From 6e53c1bd12019c63715987c0a76c79e868a05401 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 28 May 2016 20:40:33 +0200 Subject: [PATCH 01/94] Change render target output from PAL8 to BGRA8 --- src/basictypes.h | 6 + src/f_wipe.cpp | 17 +- src/m_misc.cpp | 2 + src/r_draw.cpp | 1106 ++++++++++++++++++++++++++++++-- src/r_draw.h | 12 +- src/r_drawt.cpp | 397 ++++++++++-- src/r_main.cpp | 8 +- src/r_main.h | 28 + src/r_plane.cpp | 24 +- src/r_segs.cpp | 107 ++- src/r_swrenderer.cpp | 4 + src/r_things.cpp | 73 ++- src/textures/canvastexture.cpp | 5 + src/v_draw.cpp | 45 +- src/v_video.cpp | 52 +- src/v_video.h | 14 +- src/win32/fb_d3d9.cpp | 38 +- src/win32/fb_ddraw.cpp | 10 +- src/win32/win32iface.h | 4 +- src/win32/win32video.cpp | 13 + 20 files changed, 1785 insertions(+), 180 deletions(-) diff --git a/src/basictypes.h b/src/basictypes.h index ff2cd972e..45e33a4a7 100644 --- a/src/basictypes.h +++ b/src/basictypes.h @@ -66,6 +66,12 @@ union QWORD_UNION typedef SDWORD fixed_t; typedef DWORD dsfixed_t; // fixedpt used by span drawer +#ifndef PALETTEOUTPUT +typedef uint32_t canvas_pixel_t; +#else +typedef BYTE canvas_pixel_t; +#endif + #define FIXED_MAX (signed)(0x7fffffff) #define FIXED_MIN (signed)(0x80000000) diff --git a/src/f_wipe.cpp b/src/f_wipe.cpp index a3ceb8d50..c6f20cadb 100644 --- a/src/f_wipe.cpp +++ b/src/f_wipe.cpp @@ -77,8 +77,10 @@ bool wipe_initMelt (int ticks) { int i, r; +#ifdef PALETTEOUTPUT // copy start screen to main screen screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); +#endif // makes this wipe faster (in theory) // to have stuff in column-major format @@ -271,7 +273,8 @@ bool wipe_doBurn (int ticks) // Draw the screen int xstep, ystep, firex, firey; int x, y; - BYTE *to, *fromold, *fromnew; + canvas_pixel_t *to; + BYTE *fromold, *fromnew; const int SHIFT = 16; xstep = (FIREWIDTH << SHIFT) / SCREENWIDTH; @@ -298,6 +301,9 @@ bool wipe_doBurn (int ticks) } else { +#ifndef PALETTEOUTPUT + // TO DO: RGB32k.All +#else int bglevel = 64-fglevel; DWORD *fg2rgb = Col2RGB8[fglevel]; DWORD *bg2rgb = Col2RGB8[bglevel]; @@ -305,6 +311,7 @@ bool wipe_doBurn (int ticks) DWORD bg = bg2rgb[fromold[x]]; fg = (fg+bg) | 0x1f07c1f; to[x] = RGB32k.All[fg & (fg>>15)]; +#endif done = false; } } @@ -335,7 +342,9 @@ bool wipe_doFade (int ticks) fade += ticks * 2; if (fade > 64) { +#ifdef PALETTEOUTPUT screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_end); +#endif return true; } else @@ -346,7 +355,7 @@ bool wipe_doFade (int ticks) DWORD *bg2rgb = Col2RGB8[bglevel]; BYTE *fromnew = (BYTE *)wipe_scr_end; BYTE *fromold = (BYTE *)wipe_scr_start; - BYTE *to = screen->GetBuffer(); + canvas_pixel_t *to = screen->GetBuffer(); for (y = 0; y < SCREENHEIGHT; y++) { @@ -387,7 +396,9 @@ bool wipe_StartScreen (int type) if (CurrentWipeType) { wipe_scr_start = new short[SCREENWIDTH * SCREENHEIGHT / 2]; +#ifdef PALETTEOUTPUT screen->GetBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); +#endif return true; } return false; @@ -398,8 +409,10 @@ void wipe_EndScreen (void) if (CurrentWipeType) { wipe_scr_end = new short[SCREENWIDTH * SCREENHEIGHT / 2]; +#ifdef PALETTEOUTPUT screen->GetBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_end); screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); // restore start scr. +#endif // Initialize the wipe (*wipes[(CurrentWipeType-1)*3])(0); } diff --git a/src/m_misc.cpp b/src/m_misc.cpp index 87f61f253..79416c31d 100644 --- a/src/m_misc.cpp +++ b/src/m_misc.cpp @@ -655,6 +655,7 @@ static bool FindFreeName (FString &fullname, const char *extension) void M_ScreenShot (const char *filename) { +#ifdef PALETTEOUTPUT FILE *file; FString autoname; bool writepcx = (stricmp (screenshot_type, "pcx") == 0); // PNG is the default @@ -743,6 +744,7 @@ void M_ScreenShot (const char *filename) Printf ("Could not create screenshot.\n"); } } +#endif } CCMD (screenshot) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 80b91ed2d..044910008 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -42,6 +42,9 @@ #include "gi.h" #include "stats.h" #include "x86.h" +#ifndef NO_SSE +#include +#endif #undef RANGECHECK @@ -61,7 +64,7 @@ extern int ST_Y; BYTE* viewimage; extern "C" { int ylookup[MAXHEIGHT]; -BYTE *dc_destorg; +canvas_pixel_t *dc_destorg; } int scaledviewwidth; @@ -90,6 +93,7 @@ extern "C" { int dc_pitch=0xABadCafe; // [RH] Distance between rows lighttable_t* dc_colormap; +fixed_t dc_light; int dc_x; int dc_yl; int dc_yh; @@ -103,12 +107,13 @@ DWORD *dc_destblend; // blending lookups // first pixel in a column (possibly virtual) const BYTE* dc_source; -BYTE* dc_dest; +canvas_pixel_t* dc_dest; int dc_count; DWORD vplce[4]; DWORD vince[4]; BYTE* palookupoffse[4]; +fixed_t palookuplight[4]; const BYTE* bufplce[4]; // just for profiling @@ -180,7 +185,7 @@ void R_InitShadeMaps() void R_DrawColumnP_C (void) { int count; - BYTE* dest; + canvas_pixel_t* dest; fixed_t frac; fixed_t fracstep; @@ -193,6 +198,10 @@ void R_DrawColumnP_C (void) // Framebuffer destination address. dest = dc_dest; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); +#endif + // Determine scaling, // which is the only mapping to be done. fracstep = dc_iscale; @@ -212,7 +221,11 @@ void R_DrawColumnP_C (void) { // Re-map color indices from wall texture column // using a lighting/special effects LUT. - *dest = colormap[source[frac>>FRACBITS]]; +#ifndef PALETTEOUTPUT + *dest = shade_pal_index(colormap[source[frac>>FRACBITS]], light); +#else + *dest = colormap[source[frac >> FRACBITS]]; +#endif dest += pitch; frac += fracstep; @@ -226,7 +239,7 @@ void R_DrawColumnP_C (void) void R_FillColumnP (void) { int count; - BYTE* dest; + canvas_pixel_t* dest; count = dc_count; @@ -235,13 +248,21 @@ void R_FillColumnP (void) dest = dc_dest; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); +#endif + { int pitch = dc_pitch; BYTE color = dc_color; do { +#ifndef PALETTEOUTPUT + *dest = shade_pal_index(color, light); +#else *dest = color; +#endif dest += pitch; } while (--count); } @@ -250,19 +271,39 @@ void R_FillColumnP (void) void R_FillAddColumn (void) { int count; - BYTE *dest; + canvas_pixel_t *dest; count = dc_count; if (count <= 0) return; dest = dc_dest; + int pitch = dc_pitch; + +#ifndef PALETTEOUTPUT + uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; + uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; + uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; + + do + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); +#else DWORD *bg2rgb; DWORD fg; bg2rgb = dc_destblend; fg = dc_srccolor; - int pitch = dc_pitch; do { @@ -271,25 +312,45 @@ void R_FillAddColumn (void) *dest = RGB32k.All[bg & (bg>>15)]; dest += pitch; } while (--count); - +#endif } void R_FillAddClampColumn (void) { int count; - BYTE *dest; + canvas_pixel_t *dest; count = dc_count; if (count <= 0) return; dest = dc_dest; + int pitch = dc_pitch; + +#ifndef PALETTEOUTPUT + uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; + uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; + uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; + + do + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); +#else DWORD *bg2rgb; DWORD fg; bg2rgb = dc_destblend; fg = dc_srccolor; - int pitch = dc_pitch; do { @@ -304,25 +365,45 @@ void R_FillAddClampColumn (void) *dest = RGB32k.All[a & (a>>15)]; dest += pitch; } while (--count); - +#endif } void R_FillSubClampColumn (void) { int count; - BYTE *dest; + canvas_pixel_t *dest; count = dc_count; if (count <= 0) return; dest = dc_dest; + int pitch = dc_pitch; + +#ifndef PALETTEOUTPUT + uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; + uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; + uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; + + do + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 255; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 255; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 255; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); +#else DWORD *bg2rgb; DWORD fg; bg2rgb = dc_destblend; fg = dc_srccolor | 0x40100400; - int pitch = dc_pitch; do { @@ -336,25 +417,45 @@ void R_FillSubClampColumn (void) *dest = RGB32k.All[a & (a>>15)]; dest += pitch; } while (--count); - +#endif } void R_FillRevSubClampColumn (void) { int count; - BYTE *dest; + canvas_pixel_t *dest; count = dc_count; if (count <= 0) return; dest = dc_dest; + int pitch = dc_pitch; + +#ifndef PALETTEOUTPUT + uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; + uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; + uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; + + do + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 255; + uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 255; + uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 255; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); +#else DWORD *bg2rgb; DWORD fg; bg2rgb = dc_destblend; fg = dc_srccolor; - int pitch = dc_pitch; do { @@ -368,7 +469,7 @@ void R_FillRevSubClampColumn (void) *dest = RGB32k.All[a & (a>>15)]; dest += pitch; } while (--count); - +#endif } // @@ -421,7 +522,7 @@ void R_InitFuzzTable (int fuzzoff) void R_DrawFuzzColumnP_C (void) { int count; - BYTE *dest; + canvas_pixel_t *dest; // Adjust borders. Low... if (dc_yl == 0) @@ -441,6 +542,85 @@ void R_DrawFuzzColumnP_C (void) dest = ylookup[dc_yl] + dc_x + dc_destorg; +#ifndef PALETTEOUTPUT + + // Note: this implementation assumes this function is only used for the pinky shadow effect (i.e. no other fancy colormap than black) + // I'm not sure if this is really always the case or not. + + { + // [RH] Make local copies of global vars to try and improve + // the optimizations made by the compiler. + int pitch = dc_pitch; + int fuzz = fuzzpos; + int cnt; + + // [RH] Split this into three separate loops to minimize + // the number of times fuzzpos needs to be clamped. + if (fuzz) + { + cnt = MIN(FUZZTABLE - fuzz, count); + count -= cnt; + do + { + uint32_t bg = dest[fuzzoffset[fuzz++]]; + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = (bg) & 0xff; + + uint32_t red = bg_red * 3 / 4; + uint32_t green = bg_green * 3 / 4; + uint32_t blue = bg_blue * 3 / 4; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--cnt); + } + if (fuzz == FUZZTABLE || count > 0) + { + while (count >= FUZZTABLE) + { + fuzz = 0; + cnt = FUZZTABLE; + count -= FUZZTABLE; + do + { + uint32_t bg = dest[fuzzoffset[fuzz++]]; + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = (bg) & 0xff; + + uint32_t red = bg_red * 3 / 4; + uint32_t green = bg_green * 3 / 4; + uint32_t blue = bg_blue * 3 / 4; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--cnt); + } + fuzz = 0; + if (count > 0) + { + do + { + uint32_t bg = dest[fuzzoffset[fuzz++]]; + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = (bg) & 0xff; + + uint32_t red = bg_red * 3 / 4; + uint32_t green = bg_green * 3 / 4; + uint32_t blue = bg_blue * 3 / 4; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); + } + } + fuzzpos = fuzz; + } + +#else + // colormap #6 is used for shading (of 0-31, a bit brighter than average) { // [RH] Make local copies of global vars to try and improve @@ -487,6 +667,7 @@ void R_DrawFuzzColumnP_C (void) } fuzzpos = fuzz; } +#endif } #endif @@ -539,7 +720,7 @@ algorithm that uses RGB tables. void R_DrawAddColumnP_C (void) { int count; - BYTE *dest; + canvas_pixel_t *dest; fixed_t frac; fixed_t fracstep; @@ -552,6 +733,34 @@ void R_DrawAddColumnP_C (void) fracstep = dc_iscale; frac = dc_texturefrac; +#ifndef PALETTEOUTPUT + { + const BYTE *source = dc_source; + int pitch = dc_pitch; + BYTE *colormap = dc_colormap; + + do + { + uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], 0); + + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } +#else { DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; @@ -572,6 +781,7 @@ void R_DrawAddColumnP_C (void) frac += fracstep; } while (--count); } +#endif } // @@ -585,7 +795,7 @@ void R_DrawAddColumnP_C (void) void R_DrawTranslatedColumnP_C (void) { int count; - BYTE* dest; + canvas_pixel_t* dest; fixed_t frac; fixed_t fracstep; @@ -593,6 +803,10 @@ void R_DrawTranslatedColumnP_C (void) if (count <= 0) return; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); +#endif + dest = dc_dest; fracstep = dc_iscale; @@ -607,7 +821,11 @@ void R_DrawTranslatedColumnP_C (void) do { +#ifndef PALETTEOUTPUT + *dest = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); +#else *dest = colormap[translation[source[frac>>FRACBITS]]]; +#endif dest += pitch; frac += fracstep; @@ -619,7 +837,7 @@ void R_DrawTranslatedColumnP_C (void) void R_DrawTlatedAddColumnP_C (void) { int count; - BYTE *dest; + canvas_pixel_t *dest; fixed_t frac; fixed_t fracstep; @@ -627,11 +845,44 @@ void R_DrawTlatedAddColumnP_C (void) if (count <= 0) return; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); +#endif + dest = dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; +#ifndef PALETTEOUTPUT + { + BYTE *translation = dc_translation; + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + + do + { + uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } +#else { DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; @@ -647,12 +898,13 @@ void R_DrawTlatedAddColumnP_C (void) fg = fg2rgb[fg]; bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg>>15)]; + fg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg >> 15)]; dest += pitch; frac += fracstep; } while (--count); } +#endif } // Draw a column whose "color" values are actually translucency @@ -660,7 +912,7 @@ void R_DrawTlatedAddColumnP_C (void) void R_DrawShadedColumnP_C (void) { int count; - BYTE *dest; + canvas_pixel_t *dest; fixed_t frac, fracstep; count = dc_count; @@ -673,6 +925,36 @@ void R_DrawShadedColumnP_C (void) fracstep = dc_iscale; frac = dc_texturefrac; +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + { + const BYTE *source = dc_source; + BYTE *colormap = dc_colormap; + int pitch = dc_pitch; + + do + { + DWORD alpha = clamp(colormap[source[frac >> FRACBITS]], 0, 64); + DWORD inv_alpha = 64 - alpha; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; + uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; + uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } +#else { const BYTE *source = dc_source; BYTE *colormap = dc_colormap; @@ -690,13 +972,14 @@ void R_DrawShadedColumnP_C (void) frac += fracstep; } while (--count); } +#endif } // Add source to destination, clamping it to white void R_DrawAddClampColumnP_C () { int count; - BYTE *dest; + canvas_pixel_t *dest; fixed_t frac; fixed_t fracstep; @@ -709,6 +992,34 @@ void R_DrawAddClampColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; +#ifndef PALETTEOUTPUT + { + const BYTE *source = dc_source; + BYTE *colormap = dc_colormap; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + + do + { + uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } +#else { BYTE *colormap = dc_colormap; const BYTE *source = dc_source; @@ -731,13 +1042,14 @@ void R_DrawAddClampColumnP_C () frac += fracstep; } while (--count); } +#endif } // Add translated source to destination, clamping it to white void R_DrawAddClampTranslatedColumnP_C () { int count; - BYTE *dest; + canvas_pixel_t *dest; fixed_t frac; fixed_t fracstep; @@ -750,6 +1062,35 @@ void R_DrawAddClampTranslatedColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; +#ifndef PALETTEOUTPUT + { + BYTE *translation = dc_translation; + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + + do + { + uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } +#else { BYTE *translation = dc_translation; BYTE *colormap = dc_colormap; @@ -773,13 +1114,14 @@ void R_DrawAddClampTranslatedColumnP_C () frac += fracstep; } while (--count); } +#endif } // Subtract destination from source, clamping it to black void R_DrawSubClampColumnP_C () { int count; - BYTE *dest; + canvas_pixel_t *dest; fixed_t frac; fixed_t fracstep; @@ -792,6 +1134,34 @@ void R_DrawSubClampColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; +#ifndef PALETTEOUTPUT + { + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + + do + { + uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } +#else { BYTE *colormap = dc_colormap; const BYTE *source = dc_source; @@ -813,13 +1183,14 @@ void R_DrawSubClampColumnP_C () frac += fracstep; } while (--count); } +#endif } // Subtract destination from source, clamping it to black void R_DrawSubClampTranslatedColumnP_C () { int count; - BYTE *dest; + canvas_pixel_t *dest; fixed_t frac; fixed_t fracstep; @@ -832,6 +1203,35 @@ void R_DrawSubClampTranslatedColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; +#ifndef PALETTEOUTPUT + { + BYTE *translation = dc_translation; + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + + do + { + uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } +#else { BYTE *translation = dc_translation; BYTE *colormap = dc_colormap; @@ -854,13 +1254,14 @@ void R_DrawSubClampTranslatedColumnP_C () frac += fracstep; } while (--count); } +#endif } // Subtract source from destination, clamping it to black void R_DrawRevSubClampColumnP_C () { int count; - BYTE *dest; + canvas_pixel_t *dest; fixed_t frac; fixed_t fracstep; @@ -873,6 +1274,34 @@ void R_DrawRevSubClampColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; +#ifndef PALETTEOUTPUT + { + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + + do + { + uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } +#else { BYTE *colormap = dc_colormap; const BYTE *source = dc_source; @@ -894,13 +1323,14 @@ void R_DrawRevSubClampColumnP_C () frac += fracstep; } while (--count); } +#endif } // Subtract source from destination, clamping it to black void R_DrawRevSubClampTranslatedColumnP_C () { int count; - BYTE *dest; + canvas_pixel_t *dest; fixed_t frac; fixed_t fracstep; @@ -913,6 +1343,35 @@ void R_DrawRevSubClampTranslatedColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; +#ifndef PALETTEOUTPUT + { + BYTE *translation = dc_translation; + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + + do + { + uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } +#else { BYTE *translation = dc_translation; BYTE *colormap = dc_colormap; @@ -935,6 +1394,7 @@ void R_DrawRevSubClampTranslatedColumnP_C () frac += fracstep; } while (--count); } +#endif } @@ -967,6 +1427,7 @@ int ds_x1; int ds_x2; lighttable_t* ds_colormap; +//dsfixed_t ds_light; dsfixed_t ds_xfrac; dsfixed_t ds_yfrac; @@ -1019,6 +1480,7 @@ void R_SetSpanSource(const BYTE *pixels) void R_SetSpanColormap(BYTE *colormap) { ds_colormap = colormap; + ds_light = 0; #ifdef X86_ASM if (ds_colormap != ds_curcolormap) { @@ -1062,7 +1524,7 @@ void R_DrawSpanP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - BYTE* dest; + canvas_pixel_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -1087,9 +1549,64 @@ void R_DrawSpanP_C (void) xstep = ds_xstep; ystep = ds_ystep; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(ds_light); +#endif + if (ds_xbits == 6 && ds_ybits == 6) { // 64x64 is the most common case by far, so special case it. + +#ifndef PALETTEOUTPUT +#ifndef NO_SSE + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + + int sse_count = count / 4; + count -= sse_count * 4; + while (sse_count--) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p0 = colormap[source[spot]]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p1 = colormap[source[spot]]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p2 = colormap[source[spot]]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p3 = colormap[source[spot]]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m128i fg = _mm_set_epi32(palette[p0], palette[p1], palette[p2], palette[p3]); + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + fg_hi = _mm_mullo_epi16(fg_hi, mlight); + fg_hi = _mm_srli_epi16(fg_hi, 8); + fg_lo = _mm_mullo_epi16(fg_lo, mlight); + fg_lo = _mm_srli_epi16(fg_lo, 8); + fg = _mm_packus_epi16(fg_hi, fg_lo); + _mm_storeu_si128((__m128i*)dest, fg); + + // Next step in u,v. + dest += 4; + } + if (count == 0) + return; +#endif +#endif + do { // Current texture index in u,v. @@ -1097,7 +1614,11 @@ void R_DrawSpanP_C (void) // Lookup pixel from flat texture tile, // re-index using light/colormap. +#ifndef PALETTEOUTPUT + *dest++ = shade_pal_index(colormap[source[spot]], light); +#else *dest++ = colormap[source[spot]]; +#endif // Next step in u,v. xfrac += xstep; @@ -1117,7 +1638,11 @@ void R_DrawSpanP_C (void) // Lookup pixel from flat texture tile, // re-index using light/colormap. +#ifndef PALETTEOUTPUT + *dest++ = shade_pal_index(colormap[source[spot]], light); +#else *dest++ = colormap[source[spot]]; +#endif // Next step in u,v. xfrac += xstep; @@ -1133,12 +1658,16 @@ void R_DrawSpanMaskedP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - BYTE* dest; + canvas_pixel_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; int spot; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(ds_light); +#endif + xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -1160,7 +1689,11 @@ void R_DrawSpanMaskedP_C (void) texdata = source[spot]; if (texdata != 0) { +#ifndef PALETTEOUTPUT + *dest = shade_pal_index(colormap[texdata], light); +#else *dest = colormap[texdata]; +#endif } dest++; xfrac += xstep; @@ -1180,7 +1713,11 @@ void R_DrawSpanMaskedP_C (void) texdata = source[spot]; if (texdata != 0) { +#ifndef PALETTEOUTPUT + *dest = shade_pal_index(colormap[texdata], light); +#else *dest = colormap[texdata]; +#endif } dest++; xfrac += xstep; @@ -1196,7 +1733,7 @@ void R_DrawSpanTranslucentP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - BYTE* dest; + canvas_pixel_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -1214,9 +1751,35 @@ void R_DrawSpanTranslucentP_C (void) xstep = ds_xstep; ystep = ds_ystep; + uint32_t light = calc_light_multiplier(ds_light); + if (ds_xbits == 6 && ds_ybits == 6) { // 64x64 is the most common case by far, so special case it. +#ifndef PALETTEOUTPUT + do + { + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + + uint32_t fg = shade_pal_index(colormap[source[spot]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; + + xfrac += xstep; + yfrac += ystep; + } while (--count); +#else do { spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); @@ -1229,9 +1792,37 @@ void R_DrawSpanTranslucentP_C (void) xfrac += xstep; yfrac += ystep; } while (--count); +#endif } else { +#ifndef PALETTEOUTPUT + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + uint32_t fg = shade_pal_index(colormap[source[spot]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; + + xfrac += xstep; + yfrac += ystep; + } while (--count); +#else BYTE yshift = 32 - ds_ybits; BYTE xshift = yshift - ds_xbits; int xmask = ((1 << ds_xbits) - 1) << ds_ybits; @@ -1247,6 +1838,7 @@ void R_DrawSpanTranslucentP_C (void) xfrac += xstep; yfrac += ystep; } while (--count); +#endif } } @@ -1256,7 +1848,7 @@ void R_DrawSpanMaskedTranslucentP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - BYTE* dest; + canvas_pixel_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -1264,6 +1856,8 @@ void R_DrawSpanMaskedTranslucentP_C (void) DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + uint32_t light = calc_light_multiplier(ds_light); + xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -1285,12 +1879,29 @@ void R_DrawSpanMaskedTranslucentP_C (void) texdata = source[spot]; if (texdata != 0) { +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD fg = colormap[texdata]; DWORD bg = *dest; fg = fg2rgb[fg]; bg = bg2rgb[bg]; fg = (fg+bg) | 0x1f07c1f; *dest = RGB32k.All[fg & (fg>>15)]; +#endif } dest++; xfrac += xstep; @@ -1310,12 +1921,29 @@ void R_DrawSpanMaskedTranslucentP_C (void) texdata = source[spot]; if (texdata != 0) { +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD fg = colormap[texdata]; DWORD bg = *dest; fg = fg2rgb[fg]; bg = bg2rgb[bg]; fg = (fg+bg) | 0x1f07c1f; *dest = RGB32k.All[fg & (fg>>15)]; +#endif } dest++; xfrac += xstep; @@ -1330,7 +1958,7 @@ void R_DrawSpanAddClampP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - BYTE* dest; + canvas_pixel_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -1338,6 +1966,8 @@ void R_DrawSpanAddClampP_C (void) DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + uint32_t light = calc_light_multiplier(ds_light); + xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -1354,6 +1984,23 @@ void R_DrawSpanAddClampP_C (void) do { spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); + +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(colormap[source[spot]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; DWORD b = a; @@ -1363,6 +2010,8 @@ void R_DrawSpanAddClampP_C (void) b = b - (b >> 5); a |= b; *dest++ = RGB32k.All[a & (a>>15)]; +#endif + xfrac += xstep; yfrac += ystep; } while (--count); @@ -1375,6 +2024,23 @@ void R_DrawSpanAddClampP_C (void) do { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(colormap[source[spot]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; DWORD b = a; @@ -1384,6 +2050,8 @@ void R_DrawSpanAddClampP_C (void) b = b - (b >> 5); a |= b; *dest++ = RGB32k.All[a & (a>>15)]; +#endif + xfrac += xstep; yfrac += ystep; } while (--count); @@ -1396,7 +2064,7 @@ void R_DrawSpanMaskedAddClampP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - BYTE* dest; + canvas_pixel_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -1404,6 +2072,8 @@ void R_DrawSpanMaskedAddClampP_C (void) DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + uint32_t light = calc_light_multiplier(ds_light); + xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -1425,6 +2095,22 @@ void R_DrawSpanMaskedAddClampP_C (void) texdata = source[spot]; if (texdata != 0) { +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; DWORD b = a; @@ -1434,6 +2120,7 @@ void R_DrawSpanMaskedAddClampP_C (void) b = b - (b >> 5); a |= b; *dest = RGB32k.All[a & (a>>15)]; +#endif } dest++; xfrac += xstep; @@ -1453,6 +2140,22 @@ void R_DrawSpanMaskedAddClampP_C (void) texdata = source[spot]; if (texdata != 0) { +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; DWORD b = a; @@ -1462,6 +2165,7 @@ void R_DrawSpanMaskedAddClampP_C (void) b = b - (b >> 5); a |= b; *dest = RGB32k.All[a & (a>>15)]; +#endif } dest++; xfrac += xstep; @@ -1473,7 +2177,16 @@ void R_DrawSpanMaskedAddClampP_C (void) // [RH] Just fill a span with a color void R_FillSpan (void) { - memset (ylookup[ds_y] + ds_x1 + dc_destorg, ds_color, ds_x2 - ds_x1 + 1); +#ifndef PALETTEOUTPUT + canvas_pixel_t *dest = ylookup[ds_y] + ds_x1 + dc_destorg; + int count = (ds_x2 - ds_x1 + 1); + uint32_t light = calc_light_multiplier(ds_light); + uint32_t color = shade_pal_index(ds_color, light); + for (int i = 0; i < count; i++) + dest[i] = color; +#else + memset (ylookup[ds_y] + ds_x1 + dc_destorg, ds_color, (ds_x2 - ds_x1 + 1) * sizeof(canvas_pixel_t)); +#endif } // Draw a voxel slab @@ -1492,7 +2205,7 @@ extern "C" void R_SetupDrawSlabC(const BYTE *colormap) slabcolormap = colormap; } -extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p) +extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, canvas_pixel_t *p) { int x; const BYTE *colormap = slabcolormap; @@ -1666,13 +2379,21 @@ DWORD vlinec1 () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int bits = vlinebits; int pitch = dc_pitch; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); +#endif + do { - *dest = colormap[source[frac>>bits]]; +#ifndef PALETTEOUTPUT + *dest = shade_pal_index(colormap[source[frac>>bits]], light); +#else + *dest = colormap[source[frac >> bits]]; +#endif frac += fracstep; dest += pitch; } while (--count); @@ -1682,19 +2403,83 @@ DWORD vlinec1 () void vlinec4 () { - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int count = dc_count; int bits = vlinebits; DWORD place; +#ifndef PALETTEOUTPUT + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); +#ifndef NO_SSE + __m128i mlight_hi = _mm_set_epi16(256, light0, light0, light0, 256, light1, light1, light1); + __m128i mlight_lo = _mm_set_epi16(256, light2, light2, light2, 256, light3, light3, light3); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; +#endif +#endif + do { +#ifndef PALETTEOUTPUT +#ifndef NO_SSE + + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + BYTE p0 = palookupoffse[0][bufplce[0][place0 >> bits]]; + BYTE p1 = palookupoffse[1][bufplce[1][place1 >> bits]]; + BYTE p2 = palookupoffse[2][bufplce[2][place2 >> bits]]; + BYTE p3 = palookupoffse[3][bufplce[3][place3 >> bits]]; + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(palette[p0], palette[p1], palette[p2], palette[p3]); + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + fg_hi = _mm_mullo_epi16(fg_hi, mlight_hi); + fg_hi = _mm_srli_epi16(fg_hi, 8); + fg_lo = _mm_mullo_epi16(fg_lo, mlight_lo); + fg_lo = _mm_srli_epi16(fg_lo, 8); + fg = _mm_packus_epi16(fg_hi, fg_lo); + _mm_storeu_si128((__m128i*)dest, fg); + +#else + dest[0] = shade_pal_index(palookupoffse[0][bufplce[0][(place = vplce[0]) >> bits]], light0); vplce[0] = place + vince[0]; + dest[1] = shade_pal_index(palookupoffse[1][bufplce[1][(place = vplce[1]) >> bits]], light1); vplce[1] = place + vince[1]; + dest[2] = shade_pal_index(palookupoffse[2][bufplce[2][(place = vplce[2]) >> bits]], light2); vplce[2] = place + vince[2]; + dest[3] = shade_pal_index(palookupoffse[3][bufplce[3][(place = vplce[3]) >> bits]], light3); vplce[3] = place + vince[3]; +#endif +#else dest[0] = palookupoffse[0][bufplce[0][(place=vplce[0])>>bits]]; vplce[0] = place+vince[0]; dest[1] = palookupoffse[1][bufplce[1][(place=vplce[1])>>bits]]; vplce[1] = place+vince[1]; dest[2] = palookupoffse[2][bufplce[2][(place=vplce[2])>>bits]]; vplce[2] = place+vince[2]; dest[3] = palookupoffse[3][bufplce[3][(place=vplce[3])>>bits]]; vplce[3] = place+vince[3]; +#endif dest += dc_pitch; } while (--count); + +#ifndef PALETTEOUTPUT +#ifndef NO_SSE + // Is this needed? Global variables makes it tricky to know.. + vplce[0] = local_vplce[0]; + vplce[1] = local_vplce[1]; + vplce[2] = local_vplce[2]; + vplce[3] = local_vplce[3]; + vince[0] = local_vince[0]; + vince[1] = local_vince[1]; + vince[2] = local_vince[2]; + vince[3] = local_vince[3]; +#endif +#endif } #endif @@ -1717,16 +2502,24 @@ DWORD mvlinec1 () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int bits = mvlinebits; int pitch = dc_pitch; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); +#endif + do { BYTE pix = source[frac>>bits]; if (pix != 0) { +#ifndef PALETTEOUTPUT + *dest = shade_pal_index(colormap[pix], light); +#else *dest = colormap[pix]; +#endif } frac += fracstep; dest += pitch; @@ -1737,19 +2530,33 @@ DWORD mvlinec1 () void mvlinec4 () { - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int count = dc_count; int bits = mvlinebits; DWORD place; +#ifndef PALETTEOUTPUT + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); +#endif + do { BYTE pix; +#ifndef PALETTEOUTPUT + pix = bufplce[0][(place = vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(palookupoffse[0][pix], light0); vplce[0] = place + vince[0]; + pix = bufplce[1][(place = vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(palookupoffse[1][pix], light1); vplce[1] = place + vince[1]; + pix = bufplce[2][(place = vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(palookupoffse[2][pix], light2); vplce[2] = place + vince[2]; + pix = bufplce[3][(place = vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(palookupoffse[3][pix], light3); vplce[3] = place + vince[3]; +#else pix = bufplce[0][(place=vplce[0])>>bits]; if(pix) dest[0] = palookupoffse[0][pix]; vplce[0] = place+vince[0]; pix = bufplce[1][(place=vplce[1])>>bits]; if(pix) dest[1] = palookupoffse[1][pix]; vplce[1] = place+vince[1]; pix = bufplce[2][(place=vplce[2])>>bits]; if(pix) dest[2] = palookupoffse[2][pix]; vplce[2] = place+vince[2]; pix = bufplce[3][(place=vplce[3])>>bits]; if(pix) dest[3] = palookupoffse[3][pix]; vplce[3] = place+vince[3]; +#endif dest += dc_pitch; } while (--count); } @@ -1763,7 +2570,11 @@ extern int wallshade; static void R_DrawFogBoundarySection (int y, int y2, int x1) { BYTE *colormap = dc_colormap; - BYTE *dest = ylookup[y] + dc_destorg; + canvas_pixel_t *dest = ylookup[y] + dc_destorg; + +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); +#endif for (; y < y2; ++y) { @@ -1771,7 +2582,11 @@ static void R_DrawFogBoundarySection (int y, int y2, int x1) int x = x1; do { +#ifndef PALETTEOUTPUT + dest[x] = shade_pal_index(colormap[dest[x]], light); +#else dest[x] = colormap[dest[x]]; +#endif } while (++x <= x2); dest += dc_pitch; } @@ -1781,10 +2596,19 @@ static void R_DrawFogBoundaryLine (int y, int x) { int x2 = spanend[y]; BYTE *colormap = dc_colormap; - BYTE *dest = ylookup[y] + dc_destorg; + canvas_pixel_t *dest = ylookup[y] + dc_destorg; + +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); +#endif + do { +#ifndef PALETTEOUTPUT + dest[x] = shade_pal_index(colormap[dest[x]], light); +#else dest[x] = colormap[dest[x]]; +#endif } while (++x <= x2); } @@ -1809,6 +2633,7 @@ void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip) } dc_colormap = basecolormapdata + (rcolormap << COLORMAPSHIFT); + dc_light = 0; for (--x; x >= x1; --x) { @@ -1834,6 +2659,7 @@ void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip) } rcolormap = lcolormap; dc_colormap = basecolormapdata + (lcolormap << COLORMAPSHIFT); + dc_light = 0; } else { @@ -1891,15 +2717,37 @@ fixed_t tmvline1_add () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + uint32_t light = calc_light_multiplier(dc_light); + do { +#ifndef PALETTEOUTPUT + BYTE pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(colormap[pix], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } +#else BYTE pix = source[frac>>bits]; if (pix != 0) { @@ -1908,6 +2756,7 @@ fixed_t tmvline1_add () fg = (fg+bg) | 0x1f07c1f; *dest = RGB32k.All[fg & (fg>>15)]; } +#endif frac += fracstep; dest += pitch; } while (--count); @@ -1917,13 +2766,19 @@ fixed_t tmvline1_add () void tmvline4_add () { - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int count = dc_count; int bits = tmvlinebits; DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + do { for (int i = 0; i < 4; ++i) @@ -1931,10 +2786,27 @@ void tmvline4_add () BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD fg = fg2rgb[palookupoffse[i][pix]]; DWORD bg = bg2rgb[dest[i]]; fg = (fg+bg) | 0x1f07c1f; dest[i] = RGB32k.All[fg & (fg>>15)]; +#endif } vplce[i] += vince[i]; } @@ -1949,18 +2821,36 @@ fixed_t tmvline1_addclamp () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + uint32_t light = calc_light_multiplier(dc_light); + do { BYTE pix = source[frac>>bits]; if (pix != 0) { +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(colormap[pix], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD a = fg2rgb[colormap[pix]] + bg2rgb[*dest]; DWORD b = a; @@ -1970,6 +2860,7 @@ fixed_t tmvline1_addclamp () b = b - (b >> 5); a |= b; *dest = RGB32k.All[a & (a>>15)]; +#endif } frac += fracstep; dest += pitch; @@ -1980,13 +2871,19 @@ fixed_t tmvline1_addclamp () void tmvline4_addclamp () { - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int count = dc_count; int bits = tmvlinebits; DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + do { for (int i = 0; i < 4; ++i) @@ -1994,6 +2891,22 @@ void tmvline4_addclamp () BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD a = fg2rgb[palookupoffse[i][pix]] + bg2rgb[dest[i]]; DWORD b = a; @@ -2003,6 +2916,7 @@ void tmvline4_addclamp () b = b - (b >> 5); a |= b; dest[i] = RGB32k.All[a & (a>>15)]; +#endif } vplce[i] += vince[i]; } @@ -2017,18 +2931,36 @@ fixed_t tmvline1_subclamp () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + uint32_t light = calc_light_multiplier(dc_light); + do { BYTE pix = source[frac>>bits]; if (pix != 0) { +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(colormap[pix], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD a = (fg2rgb[colormap[pix]] | 0x40100400) - bg2rgb[*dest]; DWORD b = a; @@ -2037,6 +2969,7 @@ fixed_t tmvline1_subclamp () a &= b; a |= 0x01f07c1f; *dest = RGB32k.All[a & (a>>15)]; +#endif } frac += fracstep; dest += pitch; @@ -2047,13 +2980,19 @@ fixed_t tmvline1_subclamp () void tmvline4_subclamp () { - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int count = dc_count; int bits = tmvlinebits; DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + do { for (int i = 0; i < 4; ++i) @@ -2061,6 +3000,22 @@ void tmvline4_subclamp () BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD a = (fg2rgb[palookupoffse[i][pix]] | 0x40100400) - bg2rgb[dest[i]]; DWORD b = a; @@ -2069,6 +3024,7 @@ void tmvline4_subclamp () a &= b; a |= 0x01f07c1f; dest[i] = RGB32k.All[a & (a>>15)]; +#endif } vplce[i] += vince[i]; } @@ -2083,18 +3039,36 @@ fixed_t tmvline1_revsubclamp () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + uint32_t light = calc_light_multiplier(dc_light); + do { BYTE pix = source[frac>>bits]; if (pix != 0) { +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(colormap[pix], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[pix]]; DWORD b = a; @@ -2103,6 +3077,7 @@ fixed_t tmvline1_revsubclamp () a &= b; a |= 0x01f07c1f; *dest = RGB32k.All[a & (a>>15)]; +#endif } frac += fracstep; dest += pitch; @@ -2113,13 +3088,19 @@ fixed_t tmvline1_revsubclamp () void tmvline4_revsubclamp () { - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int count = dc_count; int bits = tmvlinebits; DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + do { for (int i = 0; i < 4; ++i) @@ -2127,6 +3108,22 @@ void tmvline4_revsubclamp () BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD a = (bg2rgb[dest[i]] | 0x40100400) - fg2rgb[palookupoffse[i][pix]]; DWORD b = a; @@ -2135,6 +3132,7 @@ void tmvline4_revsubclamp () a &= b; a |= 0x01f07c1f; dest[i] = RGB32k.All[a & (a>>15)]; +#endif } vplce[i] += vince[i]; } @@ -2418,6 +3416,7 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, { dc_colormap += fixedlightlev; } + dc_light = 0; return r_columnmethod ? DoDraw1 : DoDraw0; } @@ -2443,6 +3442,7 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, dc_srccolor = ((((r*x)>>4)<<20) | ((g*x)>>4) | ((((b)*x)>>4)<<10)) & 0x3feffbff; hcolfunc_pre = R_FillColumnHorizP; dc_colormap = identitymap; + dc_light = 0; } if (!R_SetBlendFunc (style.BlendOp, fglevel, bglevel, style.Flags)) diff --git a/src/r_draw.h b/src/r_draw.h index cb2f68f33..6f7a91154 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -30,6 +30,7 @@ extern "C" int ylookup[MAXHEIGHT]; extern "C" int dc_pitch; // [RH] Distance between rows extern "C" lighttable_t*dc_colormap; +extern "C" fixed_t dc_light; extern "C" int dc_x; extern "C" int dc_yl; extern "C" int dc_yh; @@ -44,16 +45,17 @@ extern "C" DWORD *dc_destblend; // first pixel in a column extern "C" const BYTE* dc_source; -extern "C" BYTE *dc_dest, *dc_destorg; +extern "C" canvas_pixel_t *dc_dest, *dc_destorg; extern "C" int dc_count; extern "C" DWORD vplce[4]; extern "C" DWORD vince[4]; extern "C" BYTE* palookupoffse[4]; +extern "C" fixed_t palookuplight[4]; extern "C" const BYTE* bufplce[4]; // [RH] Temporary buffer for column drawing -extern "C" BYTE *dc_temp; +extern "C" canvas_pixel_t *dc_temp; extern "C" unsigned int dc_tspans[4][MAXHEIGHT]; extern "C" unsigned int *dc_ctspan[4]; extern "C" unsigned int horizspans[4]; @@ -184,7 +186,7 @@ extern void (*rt_map4cols)(int sx, int yl, int yh); void rt_draw4cols (int sx); // [RH] Preps the temporary horizontal buffer. -void rt_initcols (BYTE *buffer=NULL); +void rt_initcols (canvas_pixel_t *buffer=NULL); void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip); @@ -231,13 +233,15 @@ void R_FillSpan (void); #endif extern "C" void R_SetupDrawSlab(const BYTE *colormap); -extern "C" void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); +extern "C" void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, canvas_pixel_t *p); extern "C" int ds_y; extern "C" int ds_x1; extern "C" int ds_x2; extern "C" lighttable_t* ds_colormap; +//extern "C" dsfixed_t ds_light; +#define ds_light dc_light extern "C" dsfixed_t ds_xfrac; extern "C" dsfixed_t ds_yfrac; diff --git a/src/r_drawt.cpp b/src/r_drawt.cpp index e8faff0ce..f5fc027b5 100644 --- a/src/r_drawt.cpp +++ b/src/r_drawt.cpp @@ -57,8 +57,8 @@ // dc_ctspan is advanced while drawing into dc_temp. // horizspan is advanced up to dc_ctspan when drawing from dc_temp to the screen. -BYTE dc_tempbuff[MAXHEIGHT*4]; -BYTE *dc_temp; +canvas_pixel_t dc_tempbuff[MAXHEIGHT*4]; +canvas_pixel_t *dc_temp; unsigned int dc_tspans[4][MAXHEIGHT]; unsigned int *dc_ctspan[4]; unsigned int *horizspan[4]; @@ -73,8 +73,8 @@ extern "C" void R_SetupAddClampCol(); // Copies one span at hx to the screen at sx. void rt_copy1col_c (int hx, int sx, int yl, int yh) { - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -114,6 +114,13 @@ void rt_copy1col_c (int hx, int sx, int yl, int yh) // Copies all four spans to the screen starting at sx. void rt_copy4cols_c (int sx, int yl, int yh) { +#ifndef PALETTEOUTPUT + // To do: we could do this with SSE using __m128i + rt_copy1col_c(0, sx, yl, yh); + rt_copy1col_c(1, sx + 1, yl, yh); + rt_copy1col_c(2, sx + 2, yl, yh); + rt_copy1col_c(3, sx + 3, yl, yh); +#else int *source; int *dest; int count; @@ -142,14 +149,15 @@ void rt_copy4cols_c (int sx, int yl, int yh) source += 8/sizeof(int); dest += pitch*2; } while (--count); +#endif } // Maps one span at hx to the screen at sx. void rt_map1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -158,13 +166,21 @@ void rt_map1col_c (int hx, int sx, int yl, int yh) return; count++; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); +#endif + colormap = dc_colormap; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; if (count & 1) { +#ifndef PALETTEOUTPUT + *dest = shade_pal_index(colormap[*source], light); +#else *dest = colormap[*source]; +#endif source += 4; dest += pitch; } @@ -172,8 +188,13 @@ void rt_map1col_c (int hx, int sx, int yl, int yh) return; do { +#ifndef PALETTEOUTPUT + dest[0] = shade_pal_index(colormap[source[0]], light); + dest[pitch] = shade_pal_index(colormap[source[4]], light); +#else dest[0] = colormap[source[0]]; dest[pitch] = colormap[source[4]]; +#endif source += 8; dest += pitch*2; } while (--count); @@ -183,8 +204,8 @@ void rt_map1col_c (int hx, int sx, int yl, int yh) void rt_map4cols_c (int sx, int yl, int yh) { BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -193,16 +214,27 @@ void rt_map4cols_c (int sx, int yl, int yh) return; count++; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); +#endif + colormap = dc_colormap; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4]; pitch = dc_pitch; if (count & 1) { +#ifndef PALETTEOUTPUT + dest[0] = shade_pal_index(colormap[source[0]], light); + dest[1] = shade_pal_index(colormap[source[1]], light); + dest[2] = shade_pal_index(colormap[source[2]], light); + dest[3] = shade_pal_index(colormap[source[3]], light); +#else dest[0] = colormap[source[0]]; dest[1] = colormap[source[1]]; dest[2] = colormap[source[2]]; dest[3] = colormap[source[3]]; +#endif source += 4; dest += pitch; } @@ -210,6 +242,16 @@ void rt_map4cols_c (int sx, int yl, int yh) return; do { +#ifndef PALETTEOUTPUT + dest[0] = shade_pal_index(colormap[source[0]], light); + dest[1] = shade_pal_index(colormap[source[1]], light); + dest[2] = shade_pal_index(colormap[source[2]], light); + dest[3] = shade_pal_index(colormap[source[3]], light); + dest[pitch] = shade_pal_index(colormap[source[4]], light); + dest[pitch + 1] = shade_pal_index(colormap[source[5]], light); + dest[pitch + 2] = shade_pal_index(colormap[source[6]], light); + dest[pitch + 3] = shade_pal_index(colormap[source[7]], light); +#else dest[0] = colormap[source[0]]; dest[1] = colormap[source[1]]; dest[2] = colormap[source[2]]; @@ -218,6 +260,7 @@ void rt_map4cols_c (int sx, int yl, int yh) dest[pitch+1] = colormap[source[5]]; dest[pitch+2] = colormap[source[6]]; dest[pitch+3] = colormap[source[7]]; +#endif source += 8; dest += pitch*2; } while (--count); @@ -227,7 +270,7 @@ void rt_map4cols_c (int sx, int yl, int yh) void rt_Translate1col(const BYTE *translation, int hx, int yl, int yh) { int count = yh - yl + 1; - BYTE *source = &dc_temp[yl*4 + hx]; + canvas_pixel_t *source = &dc_temp[yl*4 + hx]; // Things we do to hit the compiler's optimizer with a clue bat: // 1. Parallelism is explicitly spelled out by using a separate @@ -274,7 +317,7 @@ void rt_Translate1col(const BYTE *translation, int hx, int yl, int yh) void rt_Translate4cols(const BYTE *translation, int yl, int yh) { int count = yh - yl + 1; - BYTE *source = &dc_temp[yl*4]; + canvas_pixel_t *source = &dc_temp[yl*4]; int c0, c1; BYTE b0, b1; @@ -330,8 +373,8 @@ void rt_tlate4cols (int sx, int yl, int yh) void rt_add1col (int hx, int sx, int yl, int yh) { BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -340,13 +383,36 @@ void rt_add1col (int hx, int sx, int yl, int yh) return; count++; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; colormap = dc_colormap; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); + + do { + uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + + source += 4; + dest += pitch; + } while (--count); +#else + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; do { DWORD fg = colormap[*source]; DWORD bg = *dest; @@ -358,14 +424,15 @@ void rt_add1col (int hx, int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); +#endif } // Adds all four spans to the screen starting at sx without clamping. void rt_add4cols_c (int sx, int yl, int yh) { BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -374,13 +441,40 @@ void rt_add4cols_c (int sx, int yl, int yh) return; count++; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4]; pitch = dc_pitch; colormap = dc_colormap; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + + source += 4; + dest += pitch; + } while (--count); +#else + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + do { DWORD fg = colormap[source[0]]; DWORD bg = dest[0]; @@ -414,6 +508,7 @@ void rt_add4cols_c (int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); +#endif } // Translates and adds one span at hx to the screen at sx without clamping. @@ -433,10 +528,9 @@ void rt_tlateadd4cols (int sx, int yl, int yh) // Shades one span at hx to the screen at sx. void rt_shaded1col (int hx, int sx, int yl, int yh) { - DWORD *fgstart; BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -445,12 +539,37 @@ void rt_shaded1col (int hx, int sx, int yl, int yh) return; count++; - fgstart = &Col2RGB8[0][dc_color]; colormap = dc_colormap; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do { + uint32_t alpha = colormap[*source]; + uint32_t inv_alpha = 64 - alpha; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; + uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; + uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); +#else + DWORD *fgstart; + fgstart = &Col2RGB8[0][dc_color]; + do { DWORD val = colormap[*source]; DWORD fg = fgstart[val<<8]; @@ -459,15 +578,15 @@ void rt_shaded1col (int hx, int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); +#endif } // Shades all four spans to the screen starting at sx. void rt_shaded4cols_c (int sx, int yl, int yh) { - DWORD *fgstart; BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -476,12 +595,40 @@ void rt_shaded4cols_c (int sx, int yl, int yh) return; count++; - fgstart = &Col2RGB8[0][dc_color]; colormap = dc_colormap; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4]; pitch = dc_pitch; +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do { + for (int i = 0; i < 4; i++) + { + uint32_t alpha = colormap[source[i]]; + uint32_t inv_alpha = 64 - alpha; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; + uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; + uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + source += 4; + dest += pitch; + } while (--count); +#else + DWORD *fgstart; + fgstart = &Col2RGB8[0][dc_color]; + do { DWORD val; @@ -504,14 +651,15 @@ void rt_shaded4cols_c (int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); +#endif } // Adds one span at hx to the screen at sx with clamping. void rt_addclamp1col (int hx, int sx, int yl, int yh) { BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -520,13 +668,36 @@ void rt_addclamp1col (int hx, int sx, int yl, int yh) return; count++; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; colormap = dc_colormap; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); + + do { + uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); +#else + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + do { DWORD a = fg2rgb[colormap[*source]] + bg2rgb[*dest]; DWORD b = a; @@ -540,14 +711,15 @@ void rt_addclamp1col (int hx, int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); +#endif } // Adds all four spans to the screen starting at sx with clamping. void rt_addclamp4cols_c (int sx, int yl, int yh) { BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -556,13 +728,39 @@ void rt_addclamp4cols_c (int sx, int yl, int yh) return; count++; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4]; pitch = dc_pitch; colormap = dc_colormap; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + source += 4; + dest += pitch; + } while (--count); +#else + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + do { DWORD a = fg2rgb[colormap[source[0]]] + bg2rgb[dest[0]]; DWORD b = a; @@ -604,6 +802,7 @@ void rt_addclamp4cols_c (int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); +#endif } // Translates and adds one span at hx to the screen at sx with clamping. @@ -624,8 +823,8 @@ void rt_tlateaddclamp4cols (int sx, int yl, int yh) void rt_subclamp1col (int hx, int sx, int yl, int yh) { BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -634,13 +833,35 @@ void rt_subclamp1col (int hx, int sx, int yl, int yh) return; count++; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; colormap = dc_colormap; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); + + do { + uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); +#else + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; do { DWORD a = (fg2rgb[colormap[*source]] | 0x40100400) - bg2rgb[*dest]; DWORD b = a; @@ -653,14 +874,15 @@ void rt_subclamp1col (int hx, int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); +#endif } // Subtracts all four spans to the screen starting at sx with clamping. void rt_subclamp4cols (int sx, int yl, int yh) { BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -669,13 +891,39 @@ void rt_subclamp4cols (int sx, int yl, int yh) return; count++; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4]; pitch = dc_pitch; colormap = dc_colormap; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + + source += 4; + dest += pitch; + } while (--count); +#else + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; do { DWORD a = (fg2rgb[colormap[source[0]]] | 0x40100400) - bg2rgb[dest[0]]; DWORD b = a; @@ -713,6 +961,7 @@ void rt_subclamp4cols (int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); +#endif } // Translates and subtracts one span at hx to the screen at sx with clamping. @@ -733,8 +982,8 @@ void rt_tlatesubclamp4cols (int sx, int yl, int yh) void rt_revsubclamp1col (int hx, int sx, int yl, int yh) { BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -750,6 +999,28 @@ void rt_revsubclamp1col (int hx, int sx, int yl, int yh) pitch = dc_pitch; colormap = dc_colormap; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); + + do { + uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); +#else do { DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[*source]]; DWORD b = a; @@ -762,14 +1033,15 @@ void rt_revsubclamp1col (int hx, int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); +#endif } // Subtracts all four spans from the screen starting at sx with clamping. void rt_revsubclamp4cols (int sx, int yl, int yh) { BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -785,6 +1057,32 @@ void rt_revsubclamp4cols (int sx, int yl, int yh) pitch = dc_pitch; colormap = dc_colormap; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + + source += 4; + dest += pitch; + } while (--count); +#else do { DWORD a = (bg2rgb[dest[0]] | 0x40100400) - fg2rgb[colormap[source[0]]]; DWORD b = a; @@ -822,6 +1120,7 @@ void rt_revsubclamp4cols (int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); +#endif } // Translates and subtracts one span at hx from the screen at sx with clamping. @@ -1002,7 +1301,7 @@ void rt_draw4cols (int sx) // Before each pass through a rendering loop that uses these routines, // call this function to set up the span pointers. -void rt_initcols (BYTE *buff) +void rt_initcols (canvas_pixel_t *buff) { int y; @@ -1016,7 +1315,7 @@ void rt_initcols (BYTE *buff) void R_DrawColumnHorizP_C (void) { int count = dc_count; - BYTE *dest; + canvas_pixel_t *dest; fixed_t fracstep; fixed_t frac; @@ -1077,7 +1376,7 @@ void R_FillColumnHorizP (void) { int count = dc_count; BYTE color = dc_color; - BYTE *dest; + canvas_pixel_t *dest; if (count <= 0) return; diff --git a/src/r_main.cpp b/src/r_main.cpp index ce4841a2e..04e798981 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -578,7 +578,7 @@ void R_HighlightPortal (PortalDrawseg* pds) BYTE color = (BYTE)BestColor((DWORD *)GPalette.BaseColors, 255, 0, 0, 0, 255); - BYTE* pixels = RenderTarget->GetBuffer(); + canvas_pixel_t* pixels = RenderTarget->GetBuffer(); // top edge for (int x = pds->x1; x < pds->x2; x++) { @@ -623,7 +623,7 @@ void R_EnterPortal (PortalDrawseg* pds, int depth) int Ytop = pds->ceilingclip[x-pds->x1]; int Ybottom = pds->floorclip[x-pds->x1]; - BYTE *dest = RenderTarget->GetBuffer() + x + Ytop * spacing; + canvas_pixel_t *dest = RenderTarget->GetBuffer() + x + Ytop * spacing; for (int y = Ytop; y <= Ybottom; y++) { @@ -794,10 +794,10 @@ void R_EnterPortal (PortalDrawseg* pds, int depth) void R_SetupBuffer () { - static BYTE *lastbuff = NULL; + static canvas_pixel_t *lastbuff = NULL; int pitch = RenderTarget->GetPitch(); - BYTE *lineptr = RenderTarget->GetBuffer() + viewwindowy*pitch + viewwindowx; + canvas_pixel_t *lineptr = RenderTarget->GetBuffer() + viewwindowy*pitch + viewwindowx; if (dc_pitch != pitch || lineptr != lastbuff) { diff --git a/src/r_main.h b/src/r_main.h index 24103393d..37a41a763 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -82,6 +82,34 @@ extern bool r_dontmaplines; // Change R_CalcTiltedLighting() when this changes. #define GETPALOOKUP(vis,shade) (clamp (((shade)-FLOAT2FIXED(MIN(MAXLIGHTVIS,double(vis))))>>FRACBITS, 0, NUMCOLORMAPS-1)) +// Calculate the light multiplier for ds_light +// This is used instead of GETPALOOKUP when ds_colormap+dc_colormap is set to the base colormap +#define LIGHTSCALE(vis,shade) ((shade)-FLOAT2FIXED(MIN(MAXLIGHTVIS,double(vis)))) + +#ifndef PALETTEOUTPUT + +// calculates the light constant passed to the shade_pal_index function +inline uint32_t calc_light_multiplier(dsfixed_t light) +{ + // the 0.70 multiplier shouldn't be needed - maybe the palette shades in doom weren't linear? + return (uint32_t)clamp((1.0 - FIXED2DBL(light) / MAXLIGHTVIS * 0.70) * 256 + 0.5, 0.0, 256.0); +} + +// Calculates a ARGB8 color for the given palette index and light multiplier +inline uint32_t shade_pal_index(uint32_t index, uint32_t light) +{ + const PalEntry &color = GPalette.BaseColors[index]; + uint32_t red = color.r; + uint32_t green = color.g; + uint32_t blue = color.b; + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + +#endif + extern double GlobVis; void R_SetVisibility(double visibility); diff --git a/src/r_plane.cpp b/src/r_plane.cpp index d749319e3..b385302e5 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -227,8 +227,14 @@ void R_MapPlane (int y, int x1) if (plane_shade) { // Determine lighting based on the span's distance from the viewer. +#ifndef PALETTEOUTPUT + ds_colormap = basecolormap->Maps; + ds_light = LIGHTSCALE(GlobVis * fabs(CenterY - y), planeshade); +#else ds_colormap = basecolormap->Maps + (GETPALOOKUP ( GlobVis * fabs(CenterY - y), planeshade) << COLORMAPSHIFT); + ds_light = 0; +#endif } #ifdef X86_ASM @@ -360,7 +366,7 @@ void R_MapTiltedPlane (int y, int x1) int x2 = spanend[y]; int width = x2 - x1; double iz, uz, vz; - BYTE *fb; + canvas_pixel_t *fb; DWORD u, v; int i; @@ -393,6 +399,7 @@ void R_MapTiltedPlane (int y, int x1) u = SQWORD(uz*z) + pviewx; v = SQWORD(vz*z) + pviewy; ds_colormap = tiltlighting[i]; + ds_light = 0; fb[i++] = ds_colormap[ds_source[(v >> vshift) | ((u >> ushift) & umask)]]; iz += plane_sz[0]; uz += plane_su[0]; @@ -486,7 +493,16 @@ void R_MapTiltedPlane (int y, int x1) void R_MapColoredPlane (int y, int x1) { - memset (ylookup[y] + x1 + dc_destorg, ds_color, spanend[y] - x1 + 1); +#ifndef PALETTEOUTPUT + canvas_pixel_t *dest = ylookup[y] + x1 + dc_destorg; + int count = (spanend[y] - x1 + 1); + uint32_t light = calc_light_multiplier(ds_light); + uint32_t color = shade_pal_index(ds_color, light); + for (int i = 0; i < count; i++) + dest[i] = color; +#else + memset (ylookup[y] + x1 + dc_destorg, ds_color, (spanend[y] - x1 + 1) * sizeof(canvas_pixel_t)); +#endif } //========================================================================== @@ -1462,11 +1478,13 @@ void R_DrawSkyPlane (visplane_t *pl) if (fixedcolormap) { dc_colormap = fixedcolormap; + dc_light = 0; } else { fakefixed = true; fixedcolormap = dc_colormap = NormalLight.Maps; + dc_light = 0; } R_DrawSky (pl); @@ -1547,6 +1565,7 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t planeheight = fabs(pl->height.Zat0() - ViewPos.Z); GlobVis = r_FloorVisibility / planeheight; + ds_light = 0; if (fixedlightlev >= 0) ds_colormap = basecolormap->Maps + fixedlightlev, plane_shade = false; else if (fixedcolormap) @@ -1707,6 +1726,7 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t if (pl->height.fC() > 0) planelightfloat = -planelightfloat; + ds_light = 0; if (fixedlightlev >= 0) ds_colormap = basecolormap->Maps + fixedlightlev, plane_shade = false; else if (fixedcolormap) diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 4eb3cb440..1cdb78555 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -178,6 +178,7 @@ static void BlastMaskedColumn (void (*blastfunc)(const BYTE *pixels, const FText if (fixedcolormap == NULL && fixedlightlev < 0) { dc_colormap = basecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); + dc_light = 0; } dc_iscale = xs_Fix<16>::ToFix(MaskedSWall[dc_x] * MaskedScaleY); @@ -316,6 +317,7 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) dc_colormap = basecolormap->Maps + fixedlightlev; else if (fixedcolormap != NULL) dc_colormap = fixedcolormap; + dc_light = 0; // find positioning texheight = tex->GetScaledHeightDouble(); @@ -633,6 +635,7 @@ void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover) dc_colormap = basecolormap->Maps + fixedlightlev; else if (fixedcolormap != NULL) dc_colormap = fixedcolormap; + dc_light = 0; WallC.sz1 = ds->sz1; WallC.sz2 = ds->sz2; @@ -1066,10 +1069,11 @@ void R_RenderFakeWallRange (drawseg_t *ds, int x1, int x2) } // prevlineasm1 is like vlineasm1 but skips the loop if only drawing one pixel -inline fixed_t prevline1 (fixed_t vince, BYTE *colormap, int count, fixed_t vplce, const BYTE *bufplce, BYTE *dest) +inline fixed_t prevline1 (fixed_t vince, BYTE *colormap, fixed_t light, int count, fixed_t vplce, const BYTE *bufplce, canvas_pixel_t *dest) { dc_iscale = vince; dc_colormap = colormap; + dc_light = light; dc_count = count; dc_texturefrac = vplce; dc_source = bufplce; @@ -1117,6 +1121,10 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l palookupoffse[1] = dc_colormap; palookupoffse[2] = dc_colormap; palookupoffse[3] = dc_colormap; + palookuplight[0] = 0; + palookuplight[1] = 0; + palookuplight[2] = 0; + palookuplight[3] = 0; } for(; (x < x2) && (x & 3); ++x) @@ -1130,7 +1138,13 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l if (!fixed) { // calculate lighting +#ifndef PALETTEOUTPUT + dc_colormap = basecolormapdata; + dc_light = LIGHTSCALE(light, wallshade); +#else dc_colormap = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); + dc_light = 0; +#endif } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1170,7 +1184,13 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l for (z = 0; z < 4; ++z) { light += rw_lightstep; - palookupoffse[z] = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); +#ifndef PALETTEOUTPUT + palookupoffse[z] = basecolormapdata; + palookuplight[z] = LIGHTSCALE(light, wallshade); +#else + palookupoffse[z] = basecolormapdata + (GETPALOOKUP(12/*light*/, wallshade) << COLORMAPSHIFT); + palookuplight[z] = 0; +#endif } } @@ -1183,7 +1203,7 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l { if (!(bad & 1)) { - prevline1(vince[z],palookupoffse[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+x+z+dc_destorg); + prevline1(vince[z],palookupoffse[z],palookuplight[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+x+z+dc_destorg); } bad >>= 1; } @@ -1194,7 +1214,7 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l { if (u4 > y1ve[z]) { - vplce[z] = prevline1(vince[z],palookupoffse[z],u4-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+x+z+dc_destorg); + vplce[z] = prevline1(vince[z],palookupoffse[z], palookuplight[z],u4-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+x+z+dc_destorg); } } @@ -1205,12 +1225,12 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l dovline4(); } - BYTE *i = x+ylookup[d4]+dc_destorg; + canvas_pixel_t *i = x+ylookup[d4]+dc_destorg; for (z = 0; z < 4; ++z) { if (y2ve[z] > d4) { - prevline1(vince[z],palookupoffse[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z); + prevline1(vince[z],palookupoffse[0],palookuplight[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z); } } } @@ -1225,7 +1245,13 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l if (!fixed) { // calculate lighting +#ifndef PALETTEOUTPUT + dc_colormap = basecolormapdata; + dc_light = LIGHTSCALE(light, wallshade); +#else dc_colormap = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); + dc_light = 0; +#endif } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1416,10 +1442,11 @@ static void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *d } } -inline fixed_t mvline1 (fixed_t vince, BYTE *colormap, int count, fixed_t vplce, const BYTE *bufplce, BYTE *dest) +inline fixed_t mvline1 (fixed_t vince, BYTE *colormap, int count, fixed_t vplce, const BYTE *bufplce, canvas_pixel_t *dest) { dc_iscale = vince; dc_colormap = colormap; + dc_light = 0; dc_count = count; dc_texturefrac = vplce; dc_source = bufplce; @@ -1431,7 +1458,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) { int x, fracbits; - BYTE *p; + canvas_pixel_t *p; int y1ve[4], y2ve[4], u4, d4, startx, dax, z; char bad; float light = rw_light - rw_lightstep; @@ -1471,7 +1498,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ palookupoffse[3] = dc_colormap; } - for(; (x < x2) && ((size_t)p & 3); ++x, ++p) + for(; (x < x2) && (((size_t)p/sizeof(canvas_pixel_t)) & 3); ++x, ++p) { light += rw_lightstep; y1ve[0] = uwal[x];//max(uwal[x],umost[x]); @@ -1481,6 +1508,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ if (!fixed) { // calculate lighting dc_colormap = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); + dc_light = 0; } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1553,7 +1581,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ domvline4(); } - BYTE *i = p+ylookup[d4]; + canvas_pixel_t *i = p+ylookup[d4]; for (z = 0; z < 4; ++z) { if (y2ve[z] > d4) @@ -1572,6 +1600,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ if (!fixed) { // calculate lighting dc_colormap = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); + dc_light = 0; } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1589,10 +1618,11 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ NetUpdate (); } -inline void preptmvline1 (fixed_t vince, BYTE *colormap, int count, fixed_t vplce, const BYTE *bufplce, BYTE *dest) +inline void preptmvline1 (fixed_t vince, BYTE *colormap, fixed_t light, int count, fixed_t vplce, const BYTE *bufplce, canvas_pixel_t *dest) { dc_iscale = vince; dc_colormap = colormap; + dc_light = light; dc_count = count; dc_texturefrac = vplce; dc_source = bufplce; @@ -1605,7 +1635,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f fixed_t (*tmvline1)(); void (*tmvline4)(); int x, fracbits; - BYTE *p; + canvas_pixel_t *p; int y1ve[4], y2ve[4], u4, d4, startx, dax, z; char bad; float light = rw_light - rw_lightstep; @@ -1645,9 +1675,13 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f palookupoffse[1] = dc_colormap; palookupoffse[2] = dc_colormap; palookupoffse[3] = dc_colormap; + palookuplight[0] = 0; + palookuplight[1] = 0; + palookuplight[2] = 0; + palookuplight[3] = 0; } - for(; (x < x2) && ((size_t)p & 3); ++x, ++p) + for(; (x < x2) && (((size_t)p / sizeof(canvas_pixel_t)) & 3); ++x, ++p) { light += rw_lightstep; y1ve[0] = uwal[x];//max(uwal[x],umost[x]); @@ -1656,7 +1690,13 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f if (!fixed) { // calculate lighting +#ifndef PALETTEOUTPUT + dc_colormap = basecolormapdata; + dc_light = LIGHTSCALE(light, wallshade); +#else dc_colormap = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); + dc_light = 0; +#endif } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1694,7 +1734,12 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f for (z = 0; z < 4; ++z) { light += rw_lightstep; +#ifndef PALETTEOUTPUT + palookupoffse[z] = basecolormapdata; + palookuplight[z] = LIGHTSCALE(light, wallshade); +#else palookupoffse[z] = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); +#endif } } @@ -1707,7 +1752,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f { if (!(bad & 1)) { - preptmvline1(vince[z],palookupoffse[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+p+z); + preptmvline1(vince[z],palookupoffse[z],palookuplight[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+p+z); tmvline1(); } bad >>= 1; @@ -1719,7 +1764,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f { if (u4 > y1ve[z]) { - preptmvline1(vince[z],palookupoffse[z],u4-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+p+z); + preptmvline1(vince[z],palookupoffse[z],palookuplight[z],u4-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+p+z); vplce[z] = tmvline1(); } } @@ -1731,12 +1776,12 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f tmvline4(); } - BYTE *i = p+ylookup[d4]; + canvas_pixel_t *i = p+ylookup[d4]; for (z = 0; z < 4; ++z) { if (y2ve[z] > d4) { - preptmvline1(vince[z],palookupoffse[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z); + preptmvline1(vince[z],palookupoffse[0],palookuplight[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z); tmvline1(); } } @@ -1750,7 +1795,13 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f if (!fixed) { // calculate lighting - dc_colormap = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); +#ifndef PALETTEOUTPUT + dc_colormap = basecolormapdata; + dc_light = LIGHTSCALE(light, wallshade); +#else + dc_colormap = basecolormapdata + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + dc_light = 0; +#endif } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1791,6 +1842,7 @@ void R_RenderSegLoop () dc_colormap = basecolormap->Maps + fixedlightlev; else if (fixedcolormap != NULL) dc_colormap = fixedcolormap; + dc_light = 0; // clip wall to the floor and ceiling for (x = x1; x < x2; ++x) @@ -3194,6 +3246,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, dc_colormap = usecolormap->Maps; else calclighting = true; + dc_light = 0; // Draw it if (decal->RenderFlags & RF_YFLIP) @@ -3242,7 +3295,13 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting +#ifndef PALETTEOUTPUT + dc_colormap = usecolormap->Maps; + dc_light = LIGHTSCALE(rw_light, wallshade); +#else dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); + dc_light = 0; +#endif } R_WallSpriteColumn (R_DrawMaskedColumn); dc_x++; @@ -3252,7 +3311,13 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting +#ifndef PALETTEOUTPUT + dc_colormap = usecolormap->Maps; + dc_light = LIGHTSCALE(rw_light, wallshade); +#else dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); + dc_light = 0; +#endif } rt_initcols(); for (int zz = 4; zz; --zz) @@ -3267,7 +3332,13 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting +#ifndef PALETTEOUTPUT + dc_colormap = usecolormap->Maps; + dc_light = LIGHTSCALE(rw_light, wallshade); +#else dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); + dc_light = 0; +#endif } R_WallSpriteColumn (R_DrawMaskedColumn); dc_x++; diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 07edf25e9..433007acb 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -182,6 +182,7 @@ void FSoftwareRenderer::RemapVoxels() void FSoftwareRenderer::WriteSavePic (player_t *player, FILE *file, int width, int height) { +#ifdef PALETTEOUTPUT DCanvas *pic = new DSimpleCanvas (width, height); PalEntry palette[256]; @@ -195,6 +196,7 @@ void FSoftwareRenderer::WriteSavePic (player_t *player, FILE *file, int width, i pic->Destroy(); pic->ObjectFlags |= OF_YesReallyDelete; delete pic; +#endif } //=========================================================================== @@ -311,6 +313,7 @@ void FSoftwareRenderer::CopyStackedViewParameters() void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoint, int fov) { +#ifdef PALETTEOUTPUT BYTE *Pixels = const_cast(tex->GetPixels()); DSimpleCanvas *Canvas = tex->GetCanvas(); @@ -334,6 +337,7 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin tex->SetUpdated(); fixedcolormap = savecolormap; realfixedcolormap = savecm; +#endif } //========================================================================== diff --git a/src/r_things.cpp b/src/r_things.cpp index 427e61b06..0e55b45f9 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -132,7 +132,7 @@ EXTERN_CVAR (Bool, r_drawvoxels) // int OffscreenBufferWidth, OffscreenBufferHeight; -BYTE *OffscreenColorBuffer; +canvas_pixel_t *OffscreenColorBuffer; FCoverageBuffer *OffscreenCoverageBuffer; // @@ -408,6 +408,7 @@ void R_DrawVisSprite (vissprite_t *vis) fixed_t centeryfrac = FLOAT2FIXED(CenterY); dc_colormap = vis->Style.colormap; + dc_light = 0; mode = R_SetPatchStyle (vis->Style.RenderStyle, vis->Style.Alpha, vis->Translation, vis->FillColor); @@ -544,6 +545,7 @@ void R_DrawWallSprite(vissprite_t *spr) dc_colormap = usecolormap->Maps; else calclighting = true; + dc_light = 0; // Draw it WallSpriteTile = spr->pic; @@ -592,7 +594,13 @@ void R_DrawWallSprite(vissprite_t *spr) { if (calclighting) { // calculate lighting +#ifndef PALETTEOUTPUT + dc_colormap = usecolormap->Maps; + dc_light = LIGHTSCALE(rw_light, shade); +#else dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, shade) << COLORMAPSHIFT); + dc_light = FLOAT2FIXED(MAXLIGHTVIS); +#endif } if (!R_ClipSpriteColumnWithPortals(spr)) R_WallSpriteColumn(R_DrawMaskedColumn); @@ -603,7 +611,13 @@ void R_DrawWallSprite(vissprite_t *spr) { if (calclighting) { // calculate lighting +#ifndef PALETTEOUTPUT + dc_colormap = usecolormap->Maps; + dc_light = LIGHTSCALE(rw_light, shade); +#else dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, shade) << COLORMAPSHIFT); + dc_light = FLOAT2FIXED(MAXLIGHTVIS); +#endif } rt_initcols(); for (int zz = 4; zz; --zz) @@ -619,7 +633,13 @@ void R_DrawWallSprite(vissprite_t *spr) { if (calclighting) { // calculate lighting +#ifndef PALETTEOUTPUT + dc_colormap = usecolormap->Maps; + dc_light = LIGHTSCALE(rw_light, shade); +#else dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, shade) << COLORMAPSHIFT); + dc_light = FLOAT2FIXED(MAXLIGHTVIS); +#endif } if (!R_ClipSpriteColumnWithPortals(spr)) R_WallSpriteColumn(R_DrawMaskedColumn); @@ -654,6 +674,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop // Do setup for blending. dc_colormap = spr->Style.colormap; + dc_light = 0; mode = R_SetPatchStyle(spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor); if (mode == DontDraw) @@ -2598,10 +2619,8 @@ static void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis) void R_DrawParticle (vissprite_t *vis) { - DWORD *bg2rgb; int spacing; - BYTE *dest; - DWORD fg; + canvas_pixel_t *dest; BYTE color = vis->Style.colormap[vis->startfrac]; int yl = vis->y1; int ycount = vis->y2 - yl + 1; @@ -2610,6 +2629,47 @@ void R_DrawParticle (vissprite_t *vis) R_DrawMaskedSegsBehindParticle (vis); +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(color, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + // vis->renderflags holds translucency level (0-255) + fixed_t fglevel = ((vis->renderflags + 1) << 8) & ~0x3ff; + uint32_t alpha = fglevel * 256 / FRACUNIT; + uint32_t inv_alpha = 256 - alpha; + + fg_red *= alpha; + fg_green *= alpha; + fg_blue *= alpha; + + spacing = RenderTarget->GetPitch(); + + for (int x = x1; x < (x1 + countbase); x++) + { + dc_x = x; + if (R_ClipSpriteColumnWithPortals(vis)) + continue; + dest = ylookup[yl] + x + dc_destorg; + for (int y = 0; y < ycount; y++) + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red * alpha) / 256; + uint32_t green = (fg_green + bg_green * alpha) / 256; + uint32_t blue = (fg_blue + bg_blue * alpha) / 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += spacing; + } + } +#else + DWORD *bg2rgb; + DWORD fg; + // vis->renderflags holds translucency level (0-255) { fixed_t fglevel, bglevel; @@ -2659,6 +2719,7 @@ void R_DrawParticle (vissprite_t *vis) dest += spacing; } } +#endif } extern double BaseYaspectMul;; @@ -3189,12 +3250,12 @@ void R_CheckOffscreenBuffer(int width, int height, bool spansonly) { if (OffscreenColorBuffer == NULL) { - OffscreenColorBuffer = new BYTE[width * height]; + OffscreenColorBuffer = new canvas_pixel_t[width * height]; } else if (OffscreenBufferWidth != width || OffscreenBufferHeight != height) { delete[] OffscreenColorBuffer; - OffscreenColorBuffer = new BYTE[width * height]; + OffscreenColorBuffer = new canvas_pixel_t[width * height]; } } OffscreenBufferWidth = width; diff --git a/src/textures/canvastexture.cpp b/src/textures/canvastexture.cpp index 062c3af1d..7388c1306 100644 --- a/src/textures/canvastexture.cpp +++ b/src/textures/canvastexture.cpp @@ -106,6 +106,10 @@ void FCanvasTexture::MakeTexture () Canvas = new DSimpleCanvas (Width, Height); Canvas->Lock (); GC::AddSoftRoot(Canvas); +#ifndef PALETTEOUTPUT + Pixels = new BYTE[Width*Height]; + bPixelsAllocated = true; +#else if (Width != Height || Width != Canvas->GetPitch()) { Pixels = new BYTE[Width*Height]; @@ -116,6 +120,7 @@ void FCanvasTexture::MakeTexture () Pixels = Canvas->GetBuffer(); bPixelsAllocated = false; } +#endif // Draw a special "unrendered" initial texture into the buffer. memset (Pixels, 0, Width*Height/2); memset (Pixels+Width*Height/2, 255, Width*Height/2); diff --git a/src/v_draw.cpp b/src/v_draw.cpp index c7b62b0a6..fd14b5e0a 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -166,16 +166,18 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) if (translation != NULL) { dc_colormap = (lighttable_t *)translation; + dc_light = 0; } else { dc_colormap = identitymap; + dc_light = 0; } fixedcolormap = dc_colormap; ESPSResult mode = R_SetPatchStyle (parms.style, parms.Alpha, 0, parms.fillcolor); - BYTE *destorgsave = dc_destorg; + canvas_pixel_t *destorgsave = dc_destorg; dc_destorg = screen->GetBuffer(); if (dc_destorg == NULL) { @@ -1015,13 +1017,32 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) oldyyshifted = yy * GetPitch(); } - BYTE *spot = GetBuffer() + oldyyshifted + xx; +#ifndef PALETTEOUTPUT + canvas_pixel_t *spot = GetBuffer() + oldyyshifted + xx; + + uint32_t fg = shade_pal_index(basecolor, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*spot >> 16) & 0xff; + uint32_t bg_green = (*spot >> 8) & 0xff; + uint32_t bg_blue = (*spot) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *spot = 0xff000000 | (red << 16) | (green << 8) | blue; +#else + canvas_pixel_t *spot = GetBuffer() + oldyyshifted + xx; DWORD *bg2rgb = Col2RGB8[1+level]; DWORD *fg2rgb = Col2RGB8[63-level]; DWORD fg = fg2rgb[basecolor]; DWORD bg = bg2rgb[*spot]; bg = (fg+bg) | 0x1f07c1f; *spot = RGB32k.All[bg&(bg>>15)]; +#endif } void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 realcolor) @@ -1069,7 +1090,7 @@ void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 real } else if (deltaX == 0) { // vertical line - BYTE *spot = GetBuffer() + y0*GetPitch() + x0; + canvas_pixel_t *spot = GetBuffer() + y0*GetPitch() + x0; int pitch = GetPitch (); do { @@ -1079,7 +1100,7 @@ void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 real } else if (deltaX == deltaY) { // diagonal line. - BYTE *spot = GetBuffer() + y0*GetPitch() + x0; + canvas_pixel_t *spot = GetBuffer() + y0*GetPitch() + x0; int advance = GetPitch() + xDir; do { @@ -1205,7 +1226,7 @@ void DCanvas::DrawPixel(int x, int y, int palColor, uint32 realcolor) void DCanvas::Clear (int left, int top, int right, int bottom, int palcolor, uint32 color) { int x, y; - BYTE *dest; + canvas_pixel_t *dest; if (left == right || top == bottom) { @@ -1426,11 +1447,11 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, // V_DrawBlock // Draw a linear block of pixels into the view buffer. // -void DCanvas::DrawBlock (int x, int y, int _width, int _height, const BYTE *src) const +void DCanvas::DrawBlock (int x, int y, int _width, int _height, const canvas_pixel_t *src) const { int srcpitch = _width; int destpitch; - BYTE *dest; + canvas_pixel_t *dest; if (ClipBox (x, y, _width, _height, src, srcpitch)) { @@ -1442,7 +1463,7 @@ void DCanvas::DrawBlock (int x, int y, int _width, int _height, const BYTE *src) do { - memcpy (dest, src, _width); + memcpy (dest, src, _width * sizeof(canvas_pixel_t)); src += srcpitch; dest += destpitch; } while (--_height); @@ -1452,9 +1473,9 @@ void DCanvas::DrawBlock (int x, int y, int _width, int _height, const BYTE *src) // V_GetBlock // Gets a linear block of pixels from the view buffer. // -void DCanvas::GetBlock (int x, int y, int _width, int _height, BYTE *dest) const +void DCanvas::GetBlock (int x, int y, int _width, int _height, canvas_pixel_t *dest) const { - const BYTE *src; + const canvas_pixel_t *src; #ifdef RANGECHECK if (x<0 @@ -1470,14 +1491,14 @@ void DCanvas::GetBlock (int x, int y, int _width, int _height, BYTE *dest) const while (_height--) { - memcpy (dest, src, _width); + memcpy (dest, src, _width * sizeof(canvas_pixel_t)); src += Pitch; dest += _width; } } // Returns true if the box was completely clipped. False otherwise. -bool DCanvas::ClipBox (int &x, int &y, int &w, int &h, const BYTE *&src, const int srcpitch) const +bool DCanvas::ClipBox (int &x, int &y, int &w, int &h, const canvas_pixel_t *&src, const int srcpitch) const { if (x >= Width || y >= Height || x+w <= 0 || y+h <= 0) { // Completely clipped off screen diff --git a/src/v_video.cpp b/src/v_video.cpp index 01a73950b..b6a626753 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -343,10 +343,8 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) if (damount == 0.f) return; - DWORD *bg2rgb; - DWORD fg; int gap; - BYTE *spot; + canvas_pixel_t *spot; int x, y; if (x1 >= Width || y1 >= Height) @@ -366,6 +364,43 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) return; } + spot = Buffer + x1 + y1*Pitch; + gap = Pitch - w; + +#ifndef PALETTEOUTPUT + uint32_t fg = color.d; + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t alpha = (uint32_t)clamp(damount * 256 + 0.5f, 0.0f, 256.0f); + uint32_t inv_alpha = 256 - alpha; + + fg_red *= alpha; + fg_green *= alpha; + fg_blue *= alpha; + + for (y = h; y != 0; y--) + { + for (x = w; x != 0; x--) + { + uint32_t bg_red = (*spot >> 16) & 0xff; + uint32_t bg_green = (*spot >> 8) & 0xff; + uint32_t bg_blue = (*spot) & 0xff; + + uint32_t red = (fg_red + bg_red * inv_alpha) / 256; + uint32_t green = (fg_green + bg_green * inv_alpha) / 256; + uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256; + + *spot = 0xff000000 | (red << 16) | (green << 8) | blue; + spot++; + } + spot += gap; + } +#else + DWORD *bg2rgb; + DWORD fg; + { int amount; @@ -377,8 +412,6 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) (((color.b * amount) >> 4) << 10); } - spot = Buffer + x1 + y1*Pitch; - gap = Pitch - w; for (y = h; y != 0; y--) { for (x = w; x != 0; x--) @@ -392,6 +425,7 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) } spot += gap; } +#endif } //========================================================================== @@ -403,7 +437,7 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) // //========================================================================== -void DCanvas::GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &color_type) +void DCanvas::GetScreenshotBuffer(const canvas_pixel_t *&buffer, int &pitch, ESSType &color_type) { Lock(true); buffer = GetBuffer(); @@ -759,8 +793,8 @@ DSimpleCanvas::DSimpleCanvas (int width, int height) Pitch = width + MAX(0, CPU.DataL1LineSize - 8); } } - MemBuffer = new BYTE[Pitch * height]; - memset (MemBuffer, 0, Pitch * height); + MemBuffer = new canvas_pixel_t[Pitch * height]; + memset (MemBuffer, 0, Pitch * height * sizeof(canvas_pixel_t)); } //========================================================================== @@ -879,7 +913,7 @@ void DFrameBuffer::DrawRateStuff () { int i = I_GetTime(false); int tics = i - LastTic; - BYTE *buffer = GetBuffer(); + canvas_pixel_t *buffer = GetBuffer(); LastTic = i; if (tics > 20) tics = 20; diff --git a/src/v_video.h b/src/v_video.h index fa1ce83df..27c09ee36 100644 --- a/src/v_video.h +++ b/src/v_video.h @@ -189,7 +189,7 @@ public: virtual ~DCanvas (); // Member variable access - inline BYTE *GetBuffer () const { return Buffer; } + inline canvas_pixel_t *GetBuffer () const { return Buffer; } inline int GetWidth () const { return Width; } inline int GetHeight () const { return Height; } inline int GetPitch () const { return Pitch; } @@ -202,10 +202,10 @@ public: virtual bool IsLocked () { return Buffer != NULL; } // Returns true if the surface is locked // Draw a linear block of pixels into the canvas - virtual void DrawBlock (int x, int y, int width, int height, const BYTE *src) const; + virtual void DrawBlock (int x, int y, int width, int height, const canvas_pixel_t *src) const; // Reads a linear block of pixels into the view buffer. - virtual void GetBlock (int x, int y, int width, int height, BYTE *dest) const; + virtual void GetBlock (int x, int y, int width, int height, canvas_pixel_t *dest) const; // Dim the entire canvas for the menus virtual void Dim (PalEntry color = 0); @@ -237,7 +237,7 @@ public: // Retrieves a buffer containing image data for a screenshot. // Hint: Pitch can be negative for upside-down images, in which case buffer // points to the last row in the buffer, which will be the first row output. - virtual void GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &color_type); + virtual void GetScreenshotBuffer(const canvas_pixel_t *&buffer, int &pitch, ESSType &color_type); // Releases the screenshot buffer. virtual void ReleaseScreenshotBuffer(); @@ -262,13 +262,13 @@ public: void DrawChar (FFont *font, int normalcolor, int x, int y, BYTE character, int tag_first, ...); protected: - BYTE *Buffer; + canvas_pixel_t *Buffer; int Width; int Height; int Pitch; int LockCount; - bool ClipBox (int &left, int &top, int &width, int &height, const BYTE *&src, const int srcpitch) const; + bool ClipBox (int &left, int &top, int &width, int &height, const canvas_pixel_t *&src, const int srcpitch) const; void DrawTextureV(FTexture *img, double x, double y, uint32 tag, va_list tags) = delete; virtual void DrawTextureParms(FTexture *img, DrawParms &parms); bool ParseDrawTextureTags (FTexture *img, double x, double y, uint32 tag, va_list tags, DrawParms *parms, bool fortext) const; @@ -297,7 +297,7 @@ public: void Unlock (); protected: - BYTE *MemBuffer; + canvas_pixel_t *MemBuffer; DSimpleCanvas() {} }; diff --git a/src/win32/fb_d3d9.cpp b/src/win32/fb_d3d9.cpp index efdced151..14a78d4cd 100644 --- a/src/win32/fb_d3d9.cpp +++ b/src/win32/fb_d3d9.cpp @@ -765,14 +765,20 @@ void D3DFB::KillNativeTexs() bool D3DFB::CreateFBTexture () { - if (FAILED(D3DDevice->CreateTexture(Width, Height, 1, D3DUSAGE_DYNAMIC, D3DFMT_L8, D3DPOOL_DEFAULT, &FBTexture, NULL))) +#ifndef PALETTEOUTPUT + D3DFORMAT FBFormat = D3DFMT_A8R8G8B8; +#else + D3DFORMAT FBFormat = D3DFMT_L8; +#endif + + if (FAILED(D3DDevice->CreateTexture(Width, Height, 1, D3DUSAGE_DYNAMIC, FBFormat, D3DPOOL_DEFAULT, &FBTexture, NULL))) { int pow2width, pow2height, i; for (i = 1; i < Width; i <<= 1) {} pow2width = i; for (i = 1; i < Height; i <<= 1) {} pow2height = i; - if (FAILED(D3DDevice->CreateTexture(pow2width, pow2height, 1, D3DUSAGE_DYNAMIC, D3DFMT_L8, D3DPOOL_DEFAULT, &FBTexture, NULL))) + if (FAILED(D3DDevice->CreateTexture(pow2width, pow2height, 1, D3DUSAGE_DYNAMIC, FBFormat, D3DPOOL_DEFAULT, &FBTexture, NULL))) { return false; } @@ -1304,18 +1310,18 @@ void D3DFB::Draw3DPart(bool copy3d) SUCCEEDED(FBTexture->LockRect (0, &lockrect, NULL, D3DLOCK_DISCARD))) || SUCCEEDED(FBTexture->LockRect (0, &lockrect, &texrect, 0))) { - if (lockrect.Pitch == Pitch && Pitch == Width) + if (lockrect.Pitch == Pitch * sizeof(canvas_pixel_t) && Pitch == Width) { - memcpy (lockrect.pBits, MemBuffer, Width * Height); + memcpy (lockrect.pBits, MemBuffer, Width * Height * sizeof(canvas_pixel_t)); } else { - BYTE *dest = (BYTE *)lockrect.pBits; - BYTE *src = MemBuffer; + canvas_pixel_t *dest = (canvas_pixel_t *)lockrect.pBits; + canvas_pixel_t *src = MemBuffer; for (int y = 0; y < Height; y++) { - memcpy (dest, src, Width); - dest += lockrect.Pitch; + memcpy (dest, src, Width * sizeof(canvas_pixel_t)); + dest = reinterpret_cast(reinterpret_cast(dest) + lockrect.Pitch); src += Pitch; } } @@ -1349,7 +1355,11 @@ void D3DFB::Draw3DPart(bool copy3d) memset(Constant, 0, sizeof(Constant)); SetAlphaBlend(D3DBLENDOP(0)); EnableAlphaTest(FALSE); +#ifndef PALETTEOUTPUT + SetPixelShader(Shaders[SHADER_NormalColor]); +#else SetPixelShader(Shaders[SHADER_NormalColorPal]); +#endif if (copy3d) { FBVERTEX verts[4]; @@ -1367,7 +1377,11 @@ void D3DFB::Draw3DPart(bool copy3d) realfixedcolormap->ColorizeStart[1]/2, realfixedcolormap->ColorizeStart[2]/2, 0); color1 = D3DCOLOR_COLORVALUE(realfixedcolormap->ColorizeEnd[0]/2, realfixedcolormap->ColorizeEnd[1]/2, realfixedcolormap->ColorizeEnd[2]/2, 1); +#ifndef PALETTEOUTPUT + SetPixelShader(Shaders[SHADER_SpecialColormap]); +#else SetPixelShader(Shaders[SHADER_SpecialColormapPal]); +#endif } } else @@ -1378,7 +1392,11 @@ void D3DFB::Draw3DPart(bool copy3d) CalcFullscreenCoords(verts, Accel2D, false, color0, color1); D3DDevice->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, verts, sizeof(FBVERTEX)); } +#ifndef PALETTEOUTPUT + SetPixelShader(Shaders[SHADER_NormalColor]); +#else SetPixelShader(Shaders[SHADER_NormalColorPal]); +#endif } //========================================================================== @@ -1707,7 +1725,7 @@ void D3DFB::SetBlendingRect(int x1, int y1, int x2, int y2) // //========================================================================== -void D3DFB::GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &color_type) +void D3DFB::GetScreenshotBuffer(const canvas_pixel_t *&buffer, int &pitch, ESSType &color_type) { D3DLOCKED_RECT lrect; @@ -1733,7 +1751,7 @@ void D3DFB::GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &color_ } else { - buffer = (const BYTE *)lrect.pBits; + buffer = (const canvas_pixel_t *)lrect.pBits; pitch = lrect.Pitch; color_type = SS_BGRA; } diff --git a/src/win32/fb_ddraw.cpp b/src/win32/fb_ddraw.cpp index 7cc603786..9be571f98 100644 --- a/src/win32/fb_ddraw.cpp +++ b/src/win32/fb_ddraw.cpp @@ -32,7 +32,6 @@ ** */ - // HEADER FILES ------------------------------------------------------------ #define DIRECTDRAW_VERSION 0x0300 @@ -61,7 +60,9 @@ // TYPES ------------------------------------------------------------------- +#ifdef USE_OBSOLETE_DDRAW IMPLEMENT_CLASS(DDrawFB) +#endif // EXTERNAL FUNCTION PROTOTYPES -------------------------------------------- @@ -119,6 +120,8 @@ cycle_t BlitCycles; // CODE -------------------------------------------------------------------- +#ifdef USE_OBSOLETE_DDRAW + DDrawFB::DDrawFB (int width, int height, bool fullscreen) : BaseWinFB (width, height) { @@ -996,8 +999,8 @@ DDrawFB::LockSurfRes DDrawFB::LockSurf (LPRECT lockrect, LPDIRECTDRAWSURFACE toL LOG1 ("Final result after restoration attempts: %08lx\n", hr); return NoGood; } - Buffer = (BYTE *)desc.lpSurface; - Pitch = desc.lPitch; + Buffer = (canvas_pixel_t *)desc.lpSurface; + Pitch = desc.lPitch / sizeof(canvas_pixel_t); BufferingNow = false; return wasLost ? GoodWasLost : Good; } @@ -1327,6 +1330,7 @@ void DDrawFB::Blank () PrimarySurf->Blt (NULL, NULL, NULL, DDBLT_COLORFILL, &blitFX); } } +#endif ADD_STAT (blit) { diff --git a/src/win32/win32iface.h b/src/win32/win32iface.h index 9b2754eae..73a2c6966 100644 --- a/src/win32/win32iface.h +++ b/src/win32/win32iface.h @@ -142,6 +142,7 @@ protected: BaseWinFB() {} }; +#ifdef USE_OBSOLETE_DDRAW class DDrawFB : public BaseWinFB { DECLARE_CLASS(DDrawFB, BaseWinFB) @@ -223,6 +224,7 @@ private: DDrawFB() {} }; +#endif class D3DFB : public BaseWinFB { @@ -250,7 +252,7 @@ public: bool PaintToWindow (); void SetVSync (bool vsync); void NewRefreshRate(); - void GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &color_type); + void GetScreenshotBuffer(const canvas_pixel_t *&buffer, int &pitch, ESSType &color_type); void ReleaseScreenshotBuffer(); void SetBlendingRect (int x1, int y1, int x2, int y2); bool Begin2D (bool copy3d); diff --git a/src/win32/win32video.cpp b/src/win32/win32video.cpp index 29bb905fb..3f3645d0b 100644 --- a/src/win32/win32video.cpp +++ b/src/win32/win32video.cpp @@ -221,8 +221,15 @@ bool Win32Video::InitD3D9 () // Enumerate available display modes. FreeModes (); +#ifndef PALETTEOUTPUT // To do: remove this again (AddD3DModes fails when there are too many modes available for videomenu to display) + AddMode(1920, 1080, 8, 1440, 0); // 1080p + AddMode(1920*2, 1080*2, 8, 1440, 0); // 4k + AddMode(2560, 1440, 8, 1440, 0); // 27" classic + AddMode(2560*2, 1440*2, 8, 1440*2, 0); // 5k +#else AddD3DModes (m_Adapter, D3DFMT_X8R8G8B8); AddD3DModes (m_Adapter, D3DFMT_R5G6B5); +#endif if (Args->CheckParm ("-2")) { // Force all modes to be pixel-doubled. ScaleModes (1); @@ -660,6 +667,10 @@ DFrameBuffer *Win32Video::CreateFrameBuffer (int width, int height, bool fullscr flashAmount = 0; } +#ifndef USE_OBSOLETE_DDRAW + fb = new D3DFB(m_Adapter, width, height, fullscreen); + LOG1("New fb created @ %p\n", fb); +#else if (D3D != NULL) { fb = new D3DFB (m_Adapter, width, height, fullscreen); @@ -668,6 +679,7 @@ DFrameBuffer *Win32Video::CreateFrameBuffer (int width, int height, bool fullscr { fb = new DDrawFB (width, height, fullscreen); } + LOG1 ("New fb created @ %p\n", fb); // If we could not create the framebuffer, try again with slightly @@ -729,6 +741,7 @@ DFrameBuffer *Win32Video::CreateFrameBuffer (int width, int height, bool fullscr fb = static_cast(CreateFrameBuffer (width, height, fullscreen, NULL)); } retry = 0; +#endif fb->SetFlash (flashColor, flashAmount); return fb; From 8aabc26cd94018238ed606b81b3d49fabbe429fd Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 30 May 2016 05:52:15 +0200 Subject: [PATCH 02/94] Created standalone rgba drawing functions --- src/CMakeLists.txt | 1 + src/r_draw.cpp | 2470 ++++++++++++++++++++++++++++++------------ src/r_draw.h | 165 ++- src/r_drawt.cpp | 332 +----- src/r_drawt_rgba.cpp | 883 +++++++++++++++ src/r_main.cpp | 4 +- src/r_plane.cpp | 11 +- src/r_plane.h | 4 + src/r_segs.cpp | 4 +- src/r_things.cpp | 97 +- src/r_things.h | 5 +- src/v_draw.cpp | 2 +- 12 files changed, 2896 insertions(+), 1082 deletions(-) create mode 100644 src/r_drawt_rgba.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 84d6f06b9..c90756b5d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -883,6 +883,7 @@ set( FASTMATH_PCH_SOURCES r_bsp.cpp r_draw.cpp r_drawt.cpp + r_drawt_rgba.cpp r_main.cpp r_plane.cpp r_segs.cpp diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 044910008..d2b694f05 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -38,6 +38,7 @@ #include "r_data/r_translate.h" #include "v_palette.h" #include "r_data/colormaps.h" +#include "r_plane.h" #include "gi.h" #include "stats.h" @@ -73,6 +74,19 @@ int scaledviewwidth; // screen depth and asm/no asm. void (*R_DrawColumnHoriz)(void); void (*R_DrawColumn)(void); +void (*R_FillColumn)(void); +void (*R_FillAddColumn)(void); +void (*R_FillAddClampColumn)(void); +void (*R_FillSubClampColumn)(void); +void (*R_FillRevSubClampColumn)(void); +void (*R_DrawAddColumn)(void); +void (*R_DrawTlatedAddColumn)(void); +void (*R_DrawAddClampColumn)(void); +void (*R_DrawAddClampTranslatedColumn)(void); +void (*R_DrawSubClampColumn)(void); +void (*R_DrawSubClampTranslatedColumn)(void); +void (*R_DrawRevSubClampColumn)(void); +void (*R_DrawRevSubClampTranslatedColumn)(void); void (*R_DrawFuzzColumn)(void); void (*R_DrawTranslatedColumn)(void); void (*R_DrawShadedColumn)(void); @@ -82,7 +96,44 @@ void (*R_DrawSpanTranslucent)(void); void (*R_DrawSpanMaskedTranslucent)(void); void (*R_DrawSpanAddClamp)(void); void (*R_DrawSpanMaskedAddClamp)(void); -void (*rt_map4cols)(int,int,int); +void (*R_FillSpan)(void); +void (*R_FillColumnHoriz)(void); +void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip); +void (*R_MapColoredPlane)(int y, int x1); +void (*R_DrawParticle)(vissprite_t *); +fixed_t (*tmvline1_add)(); +void (*tmvline4_add)(); +fixed_t (*tmvline1_addclamp)(); +void (*tmvline4_addclamp)(); +fixed_t (*tmvline1_subclamp)(); +void (*tmvline4_subclamp)(); +fixed_t (*tmvline1_revsubclamp)(); +void (*tmvline4_revsubclamp)(); +void (*rt_copy1col)(int hx, int sx, int yl, int yh); +void (*rt_copy4cols)(int sx, int yl, int yh); +void (*rt_shaded1col)(int hx, int sx, int yl, int yh); +void (*rt_shaded4cols)(int sx, int yl, int yh); +void (*rt_map1col)(int hx, int sx, int yl, int yh); +void (*rt_add1col)(int hx, int sx, int yl, int yh); +void (*rt_addclamp1col)(int hx, int sx, int yl, int yh); +void (*rt_subclamp1col)(int hx, int sx, int yl, int yh); +void (*rt_revsubclamp1col)(int hx, int sx, int yl, int yh); +void (*rt_tlate1col)(int hx, int sx, int yl, int yh); +void (*rt_tlateadd1col)(int hx, int sx, int yl, int yh); +void (*rt_tlateaddclamp1col)(int hx, int sx, int yl, int yh); +void (*rt_tlatesubclamp1col)(int hx, int sx, int yl, int yh); +void (*rt_tlaterevsubclamp1col)(int hx, int sx, int yl, int yh); +void (*rt_map4cols)(int sx, int yl, int yh); +void (*rt_add4cols)(int sx, int yl, int yh); +void (*rt_addclamp4cols)(int sx, int yl, int yh); +void (*rt_subclamp4cols)(int sx, int yl, int yh); +void (*rt_revsubclamp4cols)(int sx, int yl, int yh); +void (*rt_tlate4cols)(int sx, int yl, int yh); +void (*rt_tlateadd4cols)(int sx, int yl, int yh); +void (*rt_tlateaddclamp4cols)(int sx, int yl, int yh); +void (*rt_tlatesubclamp4cols)(int sx, int yl, int yh); +void (*rt_tlaterevsubclamp4cols)(int sx, int yl, int yh); +void (*rt_initcols)(canvas_pixel_t *buffer); // // R_DrawColumn @@ -198,10 +249,6 @@ void R_DrawColumnP_C (void) // Framebuffer destination address. dest = dc_dest; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); -#endif - // Determine scaling, // which is the only mapping to be done. fracstep = dc_iscale; @@ -221,11 +268,7 @@ void R_DrawColumnP_C (void) { // Re-map color indices from wall texture column // using a lighting/special effects LUT. -#ifndef PALETTEOUTPUT - *dest = shade_pal_index(colormap[source[frac>>FRACBITS]], light); -#else *dest = colormap[source[frac >> FRACBITS]]; -#endif dest += pitch; frac += fracstep; @@ -235,8 +278,78 @@ void R_DrawColumnP_C (void) } #endif +void R_DrawColumnP_RGBA_C() +{ + int count; + canvas_pixel_t* dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + + // Zero length, column does not exceed a pixel. + if (count <= 0) + return; + + // Framebuffer destination address. + dest = dc_dest; + + uint32_t light = calc_light_multiplier(dc_light); + + // Determine scaling, + // which is the only mapping to be done. + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + // [RH] Get local copies of these variables so that the compiler + // has a better chance of optimizing this well. + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + + // Inner loop that does the actual texture mapping, + // e.g. a DDA-lile scaling. + // This is as fast as it gets. + do + { + // Re-map color indices from wall texture column + // using a lighting/special effects LUT. + *dest = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + + dest += pitch; + frac += fracstep; + + } while (--count); + } +} + // [RH] Just fills a column with a color -void R_FillColumnP (void) +void R_FillColumnP_C (void) +{ + int count; + canvas_pixel_t* dest; + + count = dc_count; + + if (count <= 0) + return; + + dest = dc_dest; + + { + int pitch = dc_pitch; + BYTE color = dc_color; + + do + { + *dest = color; + dest += pitch; + } while (--count); + } +} + +void R_FillColumnP_RGBA() { int count; canvas_pixel_t* dest; @@ -248,9 +361,7 @@ void R_FillColumnP (void) dest = dc_dest; -#ifndef PALETTEOUTPUT uint32_t light = calc_light_multiplier(dc_light); -#endif { int pitch = dc_pitch; @@ -258,17 +369,40 @@ void R_FillColumnP (void) do { -#ifndef PALETTEOUTPUT *dest = shade_pal_index(color, light); -#else - *dest = color; -#endif dest += pitch; } while (--count); } } -void R_FillAddColumn (void) +void R_FillAddColumn_C (void) +{ + int count; + canvas_pixel_t *dest; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + int pitch = dc_pitch; + + DWORD *bg2rgb; + DWORD fg; + + bg2rgb = dc_destblend; + fg = dc_srccolor; + + do + { + DWORD bg; + bg = (fg + bg2rgb[*dest]) | 0x1f07c1f; + *dest = RGB32k.All[bg & (bg>>15)]; + dest += pitch; + } while (--count); +} + +void R_FillAddColumn_RGBA_C() { int count; canvas_pixel_t *dest; @@ -280,7 +414,6 @@ void R_FillAddColumn (void) dest = dc_dest; int pitch = dc_pitch; -#ifndef PALETTEOUTPUT uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; @@ -298,24 +431,9 @@ void R_FillAddColumn (void) *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; } while (--count); -#else - DWORD *bg2rgb; - DWORD fg; - - bg2rgb = dc_destblend; - fg = dc_srccolor; - - do - { - DWORD bg; - bg = (fg + bg2rgb[*dest]) | 0x1f07c1f; - *dest = RGB32k.All[bg & (bg>>15)]; - dest += pitch; - } while (--count); -#endif } -void R_FillAddClampColumn (void) +void R_FillAddClampColumn_C (void) { int count; canvas_pixel_t *dest; @@ -327,25 +445,6 @@ void R_FillAddClampColumn (void) dest = dc_dest; int pitch = dc_pitch; -#ifndef PALETTEOUTPUT - uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; - uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; - uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; - - do - { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); -#else DWORD *bg2rgb; DWORD fg; @@ -365,10 +464,9 @@ void R_FillAddClampColumn (void) *dest = RGB32k.All[a & (a>>15)]; dest += pitch; } while (--count); -#endif } -void R_FillSubClampColumn (void) +void R_FillAddClampColumn_RGBA() { int count; canvas_pixel_t *dest; @@ -380,7 +478,6 @@ void R_FillSubClampColumn (void) dest = dc_dest; int pitch = dc_pitch; -#ifndef PALETTEOUTPUT uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; @@ -391,14 +488,27 @@ void R_FillSubClampColumn (void) uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 255; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 255; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 255; + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; } while (--count); -#else +} + +void R_FillSubClampColumn_C (void) +{ + int count; + canvas_pixel_t *dest; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + int pitch = dc_pitch; + DWORD *bg2rgb; DWORD fg; @@ -417,10 +527,9 @@ void R_FillSubClampColumn (void) *dest = RGB32k.All[a & (a>>15)]; dest += pitch; } while (--count); -#endif } -void R_FillRevSubClampColumn (void) +void R_FillSubClampColumn_RGBA() { int count; canvas_pixel_t *dest; @@ -432,7 +541,6 @@ void R_FillRevSubClampColumn (void) dest = dc_dest; int pitch = dc_pitch; -#ifndef PALETTEOUTPUT uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; @@ -443,14 +551,27 @@ void R_FillRevSubClampColumn (void) uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 255; - uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 255; - uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 255; + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 255; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 255; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 255; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; } while (--count); -#else +} + +void R_FillRevSubClampColumn_C (void) +{ + int count; + canvas_pixel_t *dest; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + int pitch = dc_pitch; + DWORD *bg2rgb; DWORD fg; @@ -469,7 +590,37 @@ void R_FillRevSubClampColumn (void) *dest = RGB32k.All[a & (a>>15)]; dest += pitch; } while (--count); -#endif +} + +void R_FillRevSubClampColumn_RGBA() +{ + int count; + canvas_pixel_t *dest; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + int pitch = dc_pitch; + + uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; + uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; + uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; + + do + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 255; + uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 255; + uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 255; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); } // @@ -542,7 +693,77 @@ void R_DrawFuzzColumnP_C (void) dest = ylookup[dc_yl] + dc_x + dc_destorg; -#ifndef PALETTEOUTPUT + // colormap #6 is used for shading (of 0-31, a bit brighter than average) + { + // [RH] Make local copies of global vars to try and improve + // the optimizations made by the compiler. + int pitch = dc_pitch; + int fuzz = fuzzpos; + int cnt; + BYTE *map = &NormalLight.Maps[6*256]; + + // [RH] Split this into three separate loops to minimize + // the number of times fuzzpos needs to be clamped. + if (fuzz) + { + cnt = MIN(FUZZTABLE-fuzz,count); + count -= cnt; + do + { + *dest = map[dest[fuzzoffset[fuzz++]]]; + dest += pitch; + } while (--cnt); + } + if (fuzz == FUZZTABLE || count > 0) + { + while (count >= FUZZTABLE) + { + fuzz = 0; + cnt = FUZZTABLE; + count -= FUZZTABLE; + do + { + *dest = map[dest[fuzzoffset[fuzz++]]]; + dest += pitch; + } while (--cnt); + } + fuzz = 0; + if (count > 0) + { + do + { + *dest = map[dest[fuzzoffset[fuzz++]]]; + dest += pitch; + } while (--count); + } + } + fuzzpos = fuzz; + } +} +#endif + +void R_DrawFuzzColumnP_RGBA_C() +{ + int count; + canvas_pixel_t *dest; + + // Adjust borders. Low... + if (dc_yl == 0) + dc_yl = 1; + + // .. and high. + if (dc_yh > fuzzviewheight) + dc_yh = fuzzviewheight; + + count = dc_yh - dc_yl; + + // Zero length. + if (count < 0) + return; + + count++; + + dest = ylookup[dc_yl] + dc_x + dc_destorg; // Note: this implementation assumes this function is only used for the pinky shadow effect (i.e. no other fancy colormap than black) // I'm not sure if this is really always the case or not. @@ -618,58 +839,7 @@ void R_DrawFuzzColumnP_C (void) } fuzzpos = fuzz; } - -#else - - // colormap #6 is used for shading (of 0-31, a bit brighter than average) - { - // [RH] Make local copies of global vars to try and improve - // the optimizations made by the compiler. - int pitch = dc_pitch; - int fuzz = fuzzpos; - int cnt; - BYTE *map = &NormalLight.Maps[6*256]; - - // [RH] Split this into three separate loops to minimize - // the number of times fuzzpos needs to be clamped. - if (fuzz) - { - cnt = MIN(FUZZTABLE-fuzz,count); - count -= cnt; - do - { - *dest = map[dest[fuzzoffset[fuzz++]]]; - dest += pitch; - } while (--cnt); - } - if (fuzz == FUZZTABLE || count > 0) - { - while (count >= FUZZTABLE) - { - fuzz = 0; - cnt = FUZZTABLE; - count -= FUZZTABLE; - do - { - *dest = map[dest[fuzzoffset[fuzz++]]]; - dest += pitch; - } while (--cnt); - } - fuzz = 0; - if (count > 0) - { - do - { - *dest = map[dest[fuzzoffset[fuzz++]]]; - dest += pitch; - } while (--count); - } - } - fuzzpos = fuzz; - } -#endif -} -#endif +} // // R_DrawTranlucentColumn @@ -733,7 +903,44 @@ void R_DrawAddColumnP_C (void) fracstep = dc_iscale; frac = dc_texturefrac; -#ifndef PALETTEOUTPUT + { + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + + do + { + DWORD fg = colormap[source[frac>>FRACBITS]]; + DWORD bg = *dest; + + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg>>15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } +} + +void R_DrawAddColumnP_RGBA_C() +{ + int count; + canvas_pixel_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + { const BYTE *source = dc_source; int pitch = dc_pitch; @@ -760,28 +967,6 @@ void R_DrawAddColumnP_C (void) frac += fracstep; } while (--count); } -#else - { - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - - do - { - DWORD fg = colormap[source[frac>>FRACBITS]]; - DWORD bg = *dest; - - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg>>15)]; - dest += pitch; - frac += fracstep; - } while (--count); - } -#endif } // @@ -803,9 +988,39 @@ void R_DrawTranslatedColumnP_C (void) if (count <= 0) return; -#ifndef PALETTEOUTPUT + dest = dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + // [RH] Local copies of global vars to improve compiler optimizations + BYTE *colormap = dc_colormap; + BYTE *translation = dc_translation; + const BYTE *source = dc_source; + int pitch = dc_pitch; + + do + { + *dest = colormap[translation[source[frac>>FRACBITS]]]; + dest += pitch; + frac += fracstep; + } while (--count); + } +} + +void R_DrawTranslatedColumnP_RGBA_C() +{ + int count; + canvas_pixel_t* dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + uint32_t light = calc_light_multiplier(dc_light); -#endif dest = dc_dest; @@ -821,20 +1036,54 @@ void R_DrawTranslatedColumnP_C (void) do { -#ifndef PALETTEOUTPUT *dest = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); -#else - *dest = colormap[translation[source[frac>>FRACBITS]]]; -#endif dest += pitch; - frac += fracstep; } while (--count); } } // Draw a column that is both translated and translucent -void R_DrawTlatedAddColumnP_C (void) +void R_DrawTlatedAddColumnP_C() +{ + int count; + canvas_pixel_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + BYTE *translation = dc_translation; + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + + do + { + DWORD fg = colormap[translation[source[frac>>FRACBITS]]]; + DWORD bg = *dest; + + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg >> 15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } +} + +void R_DrawTlatedAddColumnP_RGBA_C() { int count; canvas_pixel_t *dest; @@ -845,16 +1094,13 @@ void R_DrawTlatedAddColumnP_C (void) if (count <= 0) return; -#ifndef PALETTEOUTPUT uint32_t light = calc_light_multiplier(dc_light); -#endif dest = dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; -#ifndef PALETTEOUTPUT { BYTE *translation = dc_translation; BYTE *colormap = dc_colormap; @@ -882,29 +1128,6 @@ void R_DrawTlatedAddColumnP_C (void) frac += fracstep; } while (--count); } -#else - { - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - - do - { - DWORD fg = colormap[translation[source[frac>>FRACBITS]]]; - DWORD bg = *dest; - - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg + bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg >> 15)]; - dest += pitch; - frac += fracstep; - } while (--count); - } -#endif } // Draw a column whose "color" values are actually translucency @@ -925,7 +1148,41 @@ void R_DrawShadedColumnP_C (void) fracstep = dc_iscale; frac = dc_texturefrac; -#ifndef PALETTEOUTPUT + { + const BYTE *source = dc_source; + BYTE *colormap = dc_colormap; + int pitch = dc_pitch; + DWORD *fgstart = &Col2RGB8[0][dc_color]; + + do + { + DWORD val = colormap[source[frac>>FRACBITS]]; + DWORD fg = fgstart[val<<8]; + val = (Col2RGB8[64-val][*dest] + fg) | 0x1f07c1f; + *dest = RGB32k.All[val & (val>>15)]; + + dest += pitch; + frac += fracstep; + } while (--count); + } +} + +void R_DrawShadedColumnP_RGBA_C() +{ + int count; + canvas_pixel_t *dest; + fixed_t frac, fracstep; + + count = dc_count; + + if (count <= 0) + return; + + dest = dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -954,26 +1211,7 @@ void R_DrawShadedColumnP_C (void) frac += fracstep; } while (--count); } -#else - { - const BYTE *source = dc_source; - BYTE *colormap = dc_colormap; - int pitch = dc_pitch; - DWORD *fgstart = &Col2RGB8[0][dc_color]; - - do - { - DWORD val = colormap[source[frac>>FRACBITS]]; - DWORD fg = fgstart[val<<8]; - val = (Col2RGB8[64-val][*dest] + fg) | 0x1f07c1f; - *dest = RGB32k.All[val & (val>>15)]; - - dest += pitch; - frac += fracstep; - } while (--count); - } -#endif -} +} // Add source to destination, clamping it to white void R_DrawAddClampColumnP_C () @@ -992,7 +1230,6 @@ void R_DrawAddClampColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; -#ifndef PALETTEOUTPUT { const BYTE *source = dc_source; BYTE *colormap = dc_colormap; @@ -1019,30 +1256,50 @@ void R_DrawAddClampColumnP_C () frac += fracstep; } while (--count); } -#else +} + +void R_DrawAddClampColumnP_RGBA_C() +{ + int count; + canvas_pixel_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + { - BYTE *colormap = dc_colormap; const BYTE *source = dc_source; + BYTE *colormap = dc_colormap; int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; + uint32_t light = calc_light_multiplier(dc_light); do { - DWORD a = fg2rgb[colormap[source[frac>>FRACBITS]]] + bg2rgb[*dest]; - DWORD b = a; + uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a>>15)]; + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; frac += fracstep; } while (--count); } -#endif } // Add translated source to destination, clamping it to white @@ -1062,35 +1319,6 @@ void R_DrawAddClampTranslatedColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; -#ifndef PALETTEOUTPUT - { - BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - uint32_t light = calc_light_multiplier(dc_light); - - do - { - uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -#else { BYTE *translation = dc_translation; BYTE *colormap = dc_colormap; @@ -1114,7 +1342,51 @@ void R_DrawAddClampTranslatedColumnP_C () frac += fracstep; } while (--count); } -#endif +} + +void R_DrawAddClampTranslatedColumnP_RGBA_C() +{ + int count; + canvas_pixel_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + BYTE *translation = dc_translation; + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + + do + { + uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } } // Subtract destination from source, clamping it to black @@ -1134,7 +1406,45 @@ void R_DrawSubClampColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; -#ifndef PALETTEOUTPUT + { + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + + do + { + DWORD a = (fg2rgb[colormap[source[frac>>FRACBITS]]] | 0x40100400) - bg2rgb[*dest]; + DWORD b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a>>15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } +} + +void R_DrawSubClampColumnP_RGBA_C() +{ + int count; + canvas_pixel_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + { BYTE *colormap = dc_colormap; const BYTE *source = dc_source; @@ -1161,29 +1471,6 @@ void R_DrawSubClampColumnP_C () frac += fracstep; } while (--count); } -#else - { - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - DWORD a = (fg2rgb[colormap[source[frac>>FRACBITS]]] | 0x40100400) - bg2rgb[*dest]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a>>15)]; - dest += pitch; - frac += fracstep; - } while (--count); - } -#endif } // Subtract destination from source, clamping it to black @@ -1203,35 +1490,6 @@ void R_DrawSubClampTranslatedColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; -#ifndef PALETTEOUTPUT - { - BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - uint32_t light = calc_light_multiplier(dc_light); - - do - { - uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -#else { BYTE *translation = dc_translation; BYTE *colormap = dc_colormap; @@ -1254,7 +1512,51 @@ void R_DrawSubClampTranslatedColumnP_C () frac += fracstep; } while (--count); } -#endif +} + +void R_DrawSubClampTranslatedColumnP_RGBA_C() +{ + int count; + canvas_pixel_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + BYTE *translation = dc_translation; + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + + do + { + uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } } // Subtract source from destination, clamping it to black @@ -1274,7 +1576,45 @@ void R_DrawRevSubClampColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; -#ifndef PALETTEOUTPUT + { + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + + do + { + DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[source[frac>>FRACBITS]]]; + DWORD b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a>>15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } +} + +void R_DrawRevSubClampColumnP_RGBA_C() +{ + int count; + canvas_pixel_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + { BYTE *colormap = dc_colormap; const BYTE *source = dc_source; @@ -1301,29 +1641,6 @@ void R_DrawRevSubClampColumnP_C () frac += fracstep; } while (--count); } -#else - { - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[source[frac>>FRACBITS]]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a>>15)]; - dest += pitch; - frac += fracstep; - } while (--count); - } -#endif } // Subtract source from destination, clamping it to black @@ -1343,7 +1660,46 @@ void R_DrawRevSubClampTranslatedColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; -#ifndef PALETTEOUTPUT + { + BYTE *translation = dc_translation; + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + + do + { + DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[translation[source[frac>>FRACBITS]]]]; + DWORD b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[(a>>15) & a]; + dest += pitch; + frac += fracstep; + } while (--count); + } +} + +void R_DrawRevSubClampTranslatedColumnP_RGBA_C() +{ + int count; + canvas_pixel_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + { BYTE *translation = dc_translation; BYTE *colormap = dc_colormap; @@ -1371,34 +1727,9 @@ void R_DrawRevSubClampTranslatedColumnP_C () frac += fracstep; } while (--count); } -#else - { - BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[translation[source[frac>>FRACBITS]]]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[(a>>15) & a]; - dest += pitch; - frac += fracstep; - } while (--count); - } -#endif } - // // R_DrawSpan // With DOOM style restrictions on view orientation, @@ -1549,15 +1880,84 @@ void R_DrawSpanP_C (void) xstep = ds_xstep; ystep = ds_ystep; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(ds_light); + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + + do + { + // Current texture index in u,v. + spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + *dest++ = colormap[source[spot]]; + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + + do + { + // Current texture index in u,v. + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + *dest++ = colormap[source[spot]]; + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } +} #endif +void R_DrawSpanP_RGBA_C() +{ + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + canvas_pixel_t* dest; + const BYTE* source = ds_source; + const BYTE* colormap = ds_colormap; + int count; + int spot; + +#ifdef RANGECHECK + if (ds_x2 < ds_x1 || ds_x1 < 0 + || ds_x2 >= screen->width || ds_y > screen->height) + { + I_Error("R_DrawSpan: %i to %i at %i", ds_x1, ds_x2, ds_y); + } + // dscount++; +#endif + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + uint32_t light = calc_light_multiplier(ds_light); + if (ds_xbits == 6 && ds_ybits == 6) { // 64x64 is the most common case by far, so special case it. -#ifndef PALETTEOUTPUT #ifndef NO_SSE __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; @@ -1589,14 +1989,14 @@ void R_DrawSpanP_C (void) // Lookup pixel from flat texture tile, // re-index using light/colormap. - __m128i fg = _mm_set_epi32(palette[p0], palette[p1], palette[p2], palette[p3]); + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); fg_hi = _mm_mullo_epi16(fg_hi, mlight); fg_hi = _mm_srli_epi16(fg_hi, 8); fg_lo = _mm_mullo_epi16(fg_lo, mlight); fg_lo = _mm_srli_epi16(fg_lo, 8); - fg = _mm_packus_epi16(fg_hi, fg_lo); + fg = _mm_packus_epi16(fg_lo, fg_hi); _mm_storeu_si128((__m128i*)dest, fg); // Next step in u,v. @@ -1604,21 +2004,16 @@ void R_DrawSpanP_C (void) } if (count == 0) return; -#endif #endif do { // Current texture index in u,v. - spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); // Lookup pixel from flat texture tile, // re-index using light/colormap. -#ifndef PALETTEOUTPUT *dest++ = shade_pal_index(colormap[source[spot]], light); -#else - *dest++ = colormap[source[spot]]; -#endif // Next step in u,v. xfrac += xstep; @@ -1638,11 +2033,7 @@ void R_DrawSpanP_C (void) // Lookup pixel from flat texture tile, // re-index using light/colormap. -#ifndef PALETTEOUTPUT *dest++ = shade_pal_index(colormap[source[spot]], light); -#else - *dest++ = colormap[source[spot]]; -#endif // Next step in u,v. xfrac += xstep; @@ -1651,6 +2042,8 @@ void R_DrawSpanP_C (void) } } +#ifndef X86_ASM + // [RH] Draw a span with holes void R_DrawSpanMaskedP_C (void) { @@ -1664,10 +2057,6 @@ void R_DrawSpanMaskedP_C (void) int count; int spot; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(ds_light); -#endif - xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -1689,11 +2078,7 @@ void R_DrawSpanMaskedP_C (void) texdata = source[spot]; if (texdata != 0) { -#ifndef PALETTEOUTPUT - *dest = shade_pal_index(colormap[texdata], light); -#else *dest = colormap[texdata]; -#endif } dest++; xfrac += xstep; @@ -1713,11 +2098,7 @@ void R_DrawSpanMaskedP_C (void) texdata = source[spot]; if (texdata != 0) { -#ifndef PALETTEOUTPUT - *dest = shade_pal_index(colormap[texdata], light); -#else *dest = colormap[texdata]; -#endif } dest++; xfrac += xstep; @@ -1727,6 +2108,71 @@ void R_DrawSpanMaskedP_C (void) } #endif +void R_DrawSpanMaskedP_RGBA_C() +{ + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + canvas_pixel_t* dest; + const BYTE* source = ds_source; + const BYTE* colormap = ds_colormap; + int count; + int spot; + + uint32_t light = calc_light_multiplier(ds_light); + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + BYTE texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { + *dest = shade_pal_index(colormap[texdata], light); + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + BYTE texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + *dest = shade_pal_index(colormap[texdata], light); + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } +} + + void R_DrawSpanTranslucentP_C (void) { dsfixed_t xfrac; @@ -1756,7 +2202,68 @@ void R_DrawSpanTranslucentP_C (void) if (ds_xbits == 6 && ds_ybits == 6) { // 64x64 is the most common case by far, so special case it. -#ifndef PALETTEOUTPUT + do + { + spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); + DWORD fg = colormap[source[spot]]; + DWORD bg = *dest; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + *dest++ = RGB32k.All[fg & (fg>>15)]; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + DWORD fg = colormap[source[spot]]; + DWORD bg = *dest; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + *dest++ = RGB32k.All[fg & (fg>>15)]; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } +} + +void R_DrawSpanTranslucentP_RGBA_C() +{ + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + canvas_pixel_t* dest; + const BYTE* source = ds_source; + const BYTE* colormap = ds_colormap; + int count; + int spot; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + uint32_t light = calc_light_multiplier(ds_light); + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. do { spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); @@ -1779,24 +2286,9 @@ void R_DrawSpanTranslucentP_C (void) xfrac += xstep; yfrac += ystep; } while (--count); -#else - do - { - spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); - DWORD fg = colormap[source[spot]]; - DWORD bg = *dest; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest++ = RGB32k.All[fg & (fg>>15)]; - xfrac += xstep; - yfrac += ystep; - } while (--count); -#endif } else { -#ifndef PALETTEOUTPUT BYTE yshift = 32 - ds_ybits; BYTE xshift = yshift - ds_xbits; int xmask = ((1 << ds_xbits) - 1) << ds_ybits; @@ -1822,23 +2314,6 @@ void R_DrawSpanTranslucentP_C (void) xfrac += xstep; yfrac += ystep; } while (--count); -#else - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - DWORD fg = colormap[source[spot]]; - DWORD bg = *dest; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest++ = RGB32k.All[fg & (fg>>15)]; - xfrac += xstep; - yfrac += ystep; - } while (--count); -#endif } } @@ -1879,29 +2354,12 @@ void R_DrawSpanMaskedTranslucentP_C (void) texdata = source[spot]; if (texdata != 0) { -#ifndef PALETTEOUTPUT - uint32_t fg = shade_pal_index(colormap[texdata], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; -#else DWORD fg = colormap[texdata]; DWORD bg = *dest; fg = fg2rgb[fg]; bg = bg2rgb[bg]; fg = (fg+bg) | 0x1f07c1f; *dest = RGB32k.All[fg & (fg>>15)]; -#endif } dest++; xfrac += xstep; @@ -1921,29 +2379,12 @@ void R_DrawSpanMaskedTranslucentP_C (void) texdata = source[spot]; if (texdata != 0) { -#ifndef PALETTEOUTPUT - uint32_t fg = shade_pal_index(colormap[texdata], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; -#else DWORD fg = colormap[texdata]; DWORD bg = *dest; fg = fg2rgb[fg]; bg = bg2rgb[bg]; fg = (fg+bg) | 0x1f07c1f; *dest = RGB32k.All[fg & (fg>>15)]; -#endif } dest++; xfrac += xstep; @@ -1952,7 +2393,7 @@ void R_DrawSpanMaskedTranslucentP_C (void) } } -void R_DrawSpanAddClampP_C (void) +void R_DrawSpanMaskedTranslucentP_RGBA_C() { dsfixed_t xfrac; dsfixed_t yfrac; @@ -1978,6 +2419,96 @@ void R_DrawSpanAddClampP_C (void) xstep = ds_xstep; ystep = ds_ystep; + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + BYTE texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + BYTE texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } +} + +void R_DrawSpanAddClampP_C (void) +{ + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + canvas_pixel_t* dest; + const BYTE* source = ds_source; + const BYTE* colormap = ds_colormap; + int count; + int spot; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + if (ds_xbits == 6 && ds_ybits == 6) { // 64x64 is the most common case by far, so special case it. @@ -1985,22 +2516,6 @@ void R_DrawSpanAddClampP_C (void) { spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); -#ifndef PALETTEOUTPUT - uint32_t fg = shade_pal_index(colormap[source[spot]], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); - - *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; -#else DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; DWORD b = a; @@ -2010,7 +2525,6 @@ void R_DrawSpanAddClampP_C (void) b = b - (b >> 5); a |= b; *dest++ = RGB32k.All[a & (a>>15)]; -#endif xfrac += xstep; yfrac += ystep; @@ -2025,7 +2539,55 @@ void R_DrawSpanAddClampP_C (void) { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); -#ifndef PALETTEOUTPUT + DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; + DWORD b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest++ = RGB32k.All[a & (a>>15)]; + + xfrac += xstep; + yfrac += ystep; + } while (--count); + } +} + +void R_DrawSpanAddClampP_RGBA_C() +{ + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + canvas_pixel_t* dest; + const BYTE* source = ds_source; + const BYTE* colormap = ds_colormap; + int count; + int spot; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + + uint32_t light = calc_light_multiplier(ds_light); + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t fg = shade_pal_index(colormap[source[spot]], light); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -2040,17 +2602,34 @@ void R_DrawSpanAddClampP_C (void) uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; -#else - DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; - DWORD b = a; - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest++ = RGB32k.All[a & (a>>15)]; -#endif + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + uint32_t fg = shade_pal_index(colormap[source[spot]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; xfrac += xstep; yfrac += ystep; @@ -2095,22 +2674,6 @@ void R_DrawSpanMaskedAddClampP_C (void) texdata = source[spot]; if (texdata != 0) { -#ifndef PALETTEOUTPUT - uint32_t fg = shade_pal_index(colormap[texdata], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; -#else DWORD a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; DWORD b = a; @@ -2120,7 +2683,6 @@ void R_DrawSpanMaskedAddClampP_C (void) b = b - (b >> 5); a |= b; *dest = RGB32k.All[a & (a>>15)]; -#endif } dest++; xfrac += xstep; @@ -2140,7 +2702,60 @@ void R_DrawSpanMaskedAddClampP_C (void) texdata = source[spot]; if (texdata != 0) { -#ifndef PALETTEOUTPUT + DWORD a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; + DWORD b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a>>15)]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } +} + +void R_DrawSpanMaskedAddClampP_RGBA_C() +{ + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + canvas_pixel_t* dest; + const BYTE* source = ds_source; + const BYTE* colormap = ds_colormap; + int count; + int spot; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + + uint32_t light = calc_light_multiplier(ds_light); + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + BYTE texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { uint32_t fg = shade_pal_index(colormap[texdata], light); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -2155,17 +2770,39 @@ void R_DrawSpanMaskedAddClampP_C (void) uint32_t blue = (fg_blue + bg_blue + 1) / 2; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; -#else - DWORD a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; - DWORD b = a; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + BYTE texdata; - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a>>15)]; -#endif + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } dest++; xfrac += xstep; @@ -2175,18 +2812,19 @@ void R_DrawSpanMaskedAddClampP_C (void) } // [RH] Just fill a span with a color -void R_FillSpan (void) +void R_FillSpan_C (void) +{ + memset (ylookup[ds_y] + ds_x1 + dc_destorg, ds_color, (ds_x2 - ds_x1 + 1)); +} + +void R_FillSpan_RGBA() { -#ifndef PALETTEOUTPUT canvas_pixel_t *dest = ylookup[ds_y] + ds_x1 + dc_destorg; int count = (ds_x2 - ds_x1 + 1); uint32_t light = calc_light_multiplier(ds_light); uint32_t color = shade_pal_index(ds_color, light); for (int i = 0; i < count; i++) dest[i] = color; -#else - memset (ylookup[ds_y] + ds_x1 + dc_destorg, ds_color, (ds_x2 - ds_x1 + 1) * sizeof(canvas_pixel_t)); -#endif } // Draw a voxel slab @@ -2383,17 +3021,33 @@ DWORD vlinec1 () int bits = vlinebits; int pitch = dc_pitch; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); + do + { + *dest = colormap[source[frac >> bits]]; + frac += fracstep; + dest += pitch; + } while (--count); + + return frac; +} #endif +DWORD vlinec1_RGBA() +{ + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + BYTE *colormap = dc_colormap; + int count = dc_count; + const BYTE *source = dc_source; + canvas_pixel_t *dest = dc_dest; + int bits = vlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + do { -#ifndef PALETTEOUTPUT - *dest = shade_pal_index(colormap[source[frac>>bits]], light); -#else - *dest = colormap[source[frac >> bits]]; -#endif + *dest = shade_pal_index(colormap[source[frac >> bits]], light); frac += fracstep; dest += pitch; } while (--count); @@ -2401,6 +3055,7 @@ DWORD vlinec1 () return frac; } +#if !defined(X86_ASM) void vlinec4 () { canvas_pixel_t *dest = dc_dest; @@ -2408,23 +3063,37 @@ void vlinec4 () int bits = vlinebits; DWORD place; -#ifndef PALETTEOUTPUT + do + { + dest[0] = palookupoffse[0][bufplce[0][(place=vplce[0])>>bits]]; vplce[0] = place+vince[0]; + dest[1] = palookupoffse[1][bufplce[1][(place=vplce[1])>>bits]]; vplce[1] = place+vince[1]; + dest[2] = palookupoffse[2][bufplce[2][(place=vplce[2])>>bits]]; vplce[2] = place+vince[2]; + dest[3] = palookupoffse[3][bufplce[3][(place=vplce[3])>>bits]]; vplce[3] = place+vince[3]; + dest += dc_pitch; + } while (--count); +} +#endif + +void vlinec4_RGBA() +{ + canvas_pixel_t *dest = dc_dest; + int count = dc_count; + int bits = vlinebits; + uint32_t light0 = calc_light_multiplier(palookuplight[0]); uint32_t light1 = calc_light_multiplier(palookuplight[1]); uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); #ifndef NO_SSE - __m128i mlight_hi = _mm_set_epi16(256, light0, light0, light0, 256, light1, light1, light1); - __m128i mlight_lo = _mm_set_epi16(256, light2, light2, light2, 256, light3, light3, light3); + __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); + __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); uint32_t *palette = (uint32_t*)GPalette.BaseColors; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; -#endif #endif do { -#ifndef PALETTEOUTPUT #ifndef NO_SSE DWORD place0 = local_vplce[0]; @@ -2442,14 +3111,14 @@ void vlinec4 () local_vplce[2] = place2 + local_vince[2]; local_vplce[3] = place3 + local_vince[3]; - __m128i fg = _mm_set_epi32(palette[p0], palette[p1], palette[p2], palette[p3]); + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); fg_hi = _mm_mullo_epi16(fg_hi, mlight_hi); fg_hi = _mm_srli_epi16(fg_hi, 8); fg_lo = _mm_mullo_epi16(fg_lo, mlight_lo); fg_lo = _mm_srli_epi16(fg_lo, 8); - fg = _mm_packus_epi16(fg_hi, fg_lo); + fg = _mm_packus_epi16(fg_lo, fg_hi); _mm_storeu_si128((__m128i*)dest, fg); #else @@ -2457,17 +3126,10 @@ void vlinec4 () dest[1] = shade_pal_index(palookupoffse[1][bufplce[1][(place = vplce[1]) >> bits]], light1); vplce[1] = place + vince[1]; dest[2] = shade_pal_index(palookupoffse[2][bufplce[2][(place = vplce[2]) >> bits]], light2); vplce[2] = place + vince[2]; dest[3] = shade_pal_index(palookupoffse[3][bufplce[3][(place = vplce[3]) >> bits]], light3); vplce[3] = place + vince[3]; -#endif -#else - dest[0] = palookupoffse[0][bufplce[0][(place=vplce[0])>>bits]]; vplce[0] = place+vince[0]; - dest[1] = palookupoffse[1][bufplce[1][(place=vplce[1])>>bits]]; vplce[1] = place+vince[1]; - dest[2] = palookupoffse[2][bufplce[2][(place=vplce[2])>>bits]]; vplce[2] = place+vince[2]; - dest[3] = palookupoffse[3][bufplce[3][(place=vplce[3])>>bits]]; vplce[3] = place+vince[3]; #endif dest += dc_pitch; } while (--count); -#ifndef PALETTEOUTPUT #ifndef NO_SSE // Is this needed? Global variables makes it tricky to know.. vplce[0] = local_vplce[0]; @@ -2479,9 +3141,7 @@ void vlinec4 () vince[2] = local_vince[2]; vince[3] = local_vince[3]; #endif -#endif } -#endif void setupmvline (int fracbits) { @@ -2506,20 +3166,40 @@ DWORD mvlinec1 () int bits = mvlinebits; int pitch = dc_pitch; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); -#endif - do { BYTE pix = source[frac>>bits]; if (pix != 0) { -#ifndef PALETTEOUTPUT - *dest = shade_pal_index(colormap[pix], light); -#else *dest = colormap[pix]; + } + frac += fracstep; + dest += pitch; + } while (--count); + + return frac; +} #endif + +DWORD mvlinec1_RGBA() +{ + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + BYTE *colormap = dc_colormap; + int count = dc_count; + const BYTE *source = dc_source; + canvas_pixel_t *dest = dc_dest; + int bits = mvlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + + do + { + BYTE pix = source[frac >> bits]; + if (pix != 0) + { + *dest = shade_pal_index(colormap[pix], light); } frac += fracstep; dest += pitch; @@ -2528,6 +3208,7 @@ DWORD mvlinec1 () return frac; } +#if !defined(X86_ASM) void mvlinec4 () { canvas_pixel_t *dest = dc_dest; @@ -2535,33 +3216,42 @@ void mvlinec4 () int bits = mvlinebits; DWORD place; -#ifndef PALETTEOUTPUT - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); -#endif - do { BYTE pix; - -#ifndef PALETTEOUTPUT - pix = bufplce[0][(place = vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(palookupoffse[0][pix], light0); vplce[0] = place + vince[0]; - pix = bufplce[1][(place = vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(palookupoffse[1][pix], light1); vplce[1] = place + vince[1]; - pix = bufplce[2][(place = vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(palookupoffse[2][pix], light2); vplce[2] = place + vince[2]; - pix = bufplce[3][(place = vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(palookupoffse[3][pix], light3); vplce[3] = place + vince[3]; -#else pix = bufplce[0][(place=vplce[0])>>bits]; if(pix) dest[0] = palookupoffse[0][pix]; vplce[0] = place+vince[0]; pix = bufplce[1][(place=vplce[1])>>bits]; if(pix) dest[1] = palookupoffse[1][pix]; vplce[1] = place+vince[1]; pix = bufplce[2][(place=vplce[2])>>bits]; if(pix) dest[2] = palookupoffse[2][pix]; vplce[2] = place+vince[2]; pix = bufplce[3][(place=vplce[3])>>bits]; if(pix) dest[3] = palookupoffse[3][pix]; vplce[3] = place+vince[3]; -#endif dest += dc_pitch; } while (--count); } #endif +void mvlinec4_RGBA() +{ + canvas_pixel_t *dest = dc_dest; + int count = dc_count; + int bits = mvlinebits; + DWORD place; + + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); + + do + { + BYTE pix; + pix = bufplce[0][(place = vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(palookupoffse[0][pix], light0); vplce[0] = place + vince[0]; + pix = bufplce[1][(place = vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(palookupoffse[1][pix], light1); vplce[1] = place + vince[1]; + pix = bufplce[2][(place = vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(palookupoffse[2][pix], light2); vplce[2] = place + vince[2]; + pix = bufplce[3][(place = vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(palookupoffse[3][pix], light3); vplce[3] = place + vince[3]; + dest += dc_pitch; + } while (--count); +} + + extern "C" short spanend[MAXHEIGHT]; extern float rw_light; extern float rw_lightstep; @@ -2572,21 +3262,13 @@ static void R_DrawFogBoundarySection (int y, int y2, int x1) BYTE *colormap = dc_colormap; canvas_pixel_t *dest = ylookup[y] + dc_destorg; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); -#endif - for (; y < y2; ++y) { int x2 = spanend[y]; int x = x1; do { -#ifndef PALETTEOUTPUT - dest[x] = shade_pal_index(colormap[dest[x]], light); -#else dest[x] = colormap[dest[x]]; -#endif } while (++x <= x2); dest += dc_pitch; } @@ -2598,21 +3280,13 @@ static void R_DrawFogBoundaryLine (int y, int x) BYTE *colormap = dc_colormap; canvas_pixel_t *dest = ylookup[y] + dc_destorg; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); -#endif - do { -#ifndef PALETTEOUTPUT - dest[x] = shade_pal_index(colormap[dest[x]], light); -#else dest[x] = colormap[dest[x]]; -#endif } while (++x <= x2); } -void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip) +void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip) { // This is essentially the same as R_MapVisPlane but with an extra step // to create new horizontal spans whenever the light changes enough that @@ -2703,6 +3377,133 @@ void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip) } } +static void R_DrawFogBoundarySection_RGBA(int y, int y2, int x1) +{ + BYTE *colormap = dc_colormap; + canvas_pixel_t *dest = ylookup[y] + dc_destorg; + + uint32_t light = calc_light_multiplier(dc_light); + + for (; y < y2; ++y) + { + int x2 = spanend[y]; + int x = x1; + do + { + dest[x] = shade_pal_index(colormap[dest[x]], light); + } while (++x <= x2); + dest += dc_pitch; + } +} + +static void R_DrawFogBoundaryLine_RGBA(int y, int x) +{ + int x2 = spanend[y]; + BYTE *colormap = dc_colormap; + canvas_pixel_t *dest = ylookup[y] + dc_destorg; + + uint32_t light = calc_light_multiplier(dc_light); + + do + { + dest[x] = shade_pal_index(colormap[dest[x]], light); + } while (++x <= x2); +} + +void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip) +{ + // To do: we do not need to create new spans when using rgba output - instead we should calculate light on a per pixel basis + + // This is essentially the same as R_MapVisPlane but with an extra step + // to create new horizontal spans whenever the light changes enough that + // we need to use a new colormap. + + double lightstep = rw_lightstep; + double light = rw_light + rw_lightstep*(x2 - x1 - 1); + int x = x2 - 1; + int t2 = uclip[x]; + int b2 = dclip[x]; + int rcolormap = GETPALOOKUP(light, wallshade); + int lcolormap; + BYTE *basecolormapdata = basecolormap->Maps; + + if (b2 > t2) + { + clearbufshort(spanend + t2, b2 - t2, x); + } + + dc_colormap = basecolormapdata + (rcolormap << COLORMAPSHIFT); + dc_light = 0; + + for (--x; x >= x1; --x) + { + int t1 = uclip[x]; + int b1 = dclip[x]; + const int xr = x + 1; + int stop; + + light -= rw_lightstep; + lcolormap = GETPALOOKUP(light, wallshade); + if (lcolormap != rcolormap) + { + if (t2 < b2 && rcolormap != 0) + { // Colormap 0 is always the identity map, so rendering it is + // just a waste of time. + R_DrawFogBoundarySection_RGBA(t2, b2, xr); + } + if (t1 < t2) t2 = t1; + if (b1 > b2) b2 = b1; + if (t2 < b2) + { + clearbufshort(spanend + t2, b2 - t2, x); + } + rcolormap = lcolormap; + dc_colormap = basecolormapdata + (lcolormap << COLORMAPSHIFT); + dc_light = 0; + } + else + { + if (dc_colormap != basecolormapdata) + { + stop = MIN(t1, b2); + while (t2 < stop) + { + R_DrawFogBoundaryLine_RGBA(t2++, xr); + } + stop = MAX(b1, t2); + while (b2 > stop) + { + R_DrawFogBoundaryLine_RGBA(--b2, xr); + } + } + else + { + t2 = MAX(t2, MIN(t1, b2)); + b2 = MIN(b2, MAX(b1, t2)); + } + + stop = MIN(t2, b1); + while (t1 < stop) + { + spanend[t1++] = x; + } + stop = MAX(b2, t2); + while (b1 > stop) + { + spanend[--b1] = x; + } + } + + t2 = uclip[x]; + b2 = dclip[x]; + } + if (t2 < b2 && rcolormap != 0) + { + R_DrawFogBoundarySection_RGBA(t2, b2, x1); + } +} + + int tmvlinebits; void setuptmvline (int bits) @@ -2710,7 +3511,40 @@ void setuptmvline (int bits) tmvlinebits = bits; } -fixed_t tmvline1_add () +fixed_t tmvline1_add_C () +{ + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + BYTE *colormap = dc_colormap; + int count = dc_count; + const BYTE *source = dc_source; + canvas_pixel_t *dest = dc_dest; + int bits = tmvlinebits; + int pitch = dc_pitch; + + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + + uint32_t light = calc_light_multiplier(dc_light); + + do + { + BYTE pix = source[frac>>bits]; + if (pix != 0) + { + DWORD fg = fg2rgb[colormap[pix]]; + DWORD bg = bg2rgb[*dest]; + fg = (fg+bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg>>15)]; + } + frac += fracstep; + dest += pitch; + } while (--count); + + return frac; +} + +fixed_t tmvline1_add_RGBA() { DWORD fracstep = dc_iscale; DWORD frac = dc_texturefrac; @@ -2728,7 +3562,6 @@ fixed_t tmvline1_add () do { -#ifndef PALETTEOUTPUT BYTE pix = source[frac >> bits]; if (pix != 0) { @@ -2747,16 +3580,6 @@ fixed_t tmvline1_add () *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } -#else - BYTE pix = source[frac>>bits]; - if (pix != 0) - { - DWORD fg = fg2rgb[colormap[pix]]; - DWORD bg = bg2rgb[*dest]; - fg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg>>15)]; - } -#endif frac += fracstep; dest += pitch; } while (--count); @@ -2764,7 +3587,40 @@ fixed_t tmvline1_add () return frac; } -void tmvline4_add () +void tmvline4_add_C () +{ + canvas_pixel_t *dest = dc_dest; + int count = dc_count; + int bits = tmvlinebits; + + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + + do + { + for (int i = 0; i < 4; ++i) + { + BYTE pix = bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + DWORD fg = fg2rgb[palookupoffse[i][pix]]; + DWORD bg = bg2rgb[dest[i]]; + fg = (fg+bg) | 0x1f07c1f; + dest[i] = RGB32k.All[fg & (fg>>15)]; + } + vplce[i] += vince[i]; + } + dest += dc_pitch; + } while (--count); +} + +void tmvline4_add_RGBA() { canvas_pixel_t *dest = dc_dest; int count = dc_count; @@ -2786,7 +3642,6 @@ void tmvline4_add () BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { -#ifndef PALETTEOUTPUT uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -2801,12 +3656,6 @@ void tmvline4_add () uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; -#else - DWORD fg = fg2rgb[palookupoffse[i][pix]]; - DWORD bg = bg2rgb[dest[i]]; - fg = (fg+bg) | 0x1f07c1f; - dest[i] = RGB32k.All[fg & (fg>>15)]; -#endif } vplce[i] += vince[i]; } @@ -2814,7 +3663,7 @@ void tmvline4_add () } while (--count); } -fixed_t tmvline1_addclamp () +fixed_t tmvline1_addclamp_C () { DWORD fracstep = dc_iscale; DWORD frac = dc_texturefrac; @@ -2835,7 +3684,44 @@ fixed_t tmvline1_addclamp () BYTE pix = source[frac>>bits]; if (pix != 0) { -#ifndef PALETTEOUTPUT + DWORD a = fg2rgb[colormap[pix]] + bg2rgb[*dest]; + DWORD b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a>>15)]; + } + frac += fracstep; + dest += pitch; + } while (--count); + + return frac; +} + +fixed_t tmvline1_addclamp_RGBA() +{ + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + BYTE *colormap = dc_colormap; + int count = dc_count; + const BYTE *source = dc_source; + canvas_pixel_t *dest = dc_dest; + int bits = tmvlinebits; + int pitch = dc_pitch; + + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + + uint32_t light = calc_light_multiplier(dc_light); + + do + { + BYTE pix = source[frac >> bits]; + if (pix != 0) + { uint32_t fg = shade_pal_index(colormap[pix], light); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -2850,17 +3736,6 @@ fixed_t tmvline1_addclamp () uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; -#else - DWORD a = fg2rgb[colormap[pix]] + bg2rgb[*dest]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a>>15)]; -#endif } frac += fracstep; dest += pitch; @@ -2869,7 +3744,7 @@ fixed_t tmvline1_addclamp () return frac; } -void tmvline4_addclamp () +void tmvline4_addclamp_C () { canvas_pixel_t *dest = dc_dest; int count = dc_count; @@ -2878,6 +3753,35 @@ void tmvline4_addclamp () DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + do + { + for (int i = 0; i < 4; ++i) + { + BYTE pix = bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + DWORD a = fg2rgb[palookupoffse[i][pix]] + bg2rgb[dest[i]]; + DWORD b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + dest[i] = RGB32k.All[a & (a>>15)]; + } + vplce[i] += vince[i]; + } + dest += dc_pitch; + } while (--count); +} + +void tmvline4_addclamp_RGBA() +{ + canvas_pixel_t *dest = dc_dest; + int count = dc_count; + int bits = tmvlinebits; + uint32_t light[4]; light[0] = calc_light_multiplier(palookuplight[0]); light[1] = calc_light_multiplier(palookuplight[1]); @@ -2891,7 +3795,6 @@ void tmvline4_addclamp () BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { -#ifndef PALETTEOUTPUT uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -2906,17 +3809,6 @@ void tmvline4_addclamp () uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; -#else - DWORD a = fg2rgb[palookupoffse[i][pix]] + bg2rgb[dest[i]]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[i] = RGB32k.All[a & (a>>15)]; -#endif } vplce[i] += vince[i]; } @@ -2924,7 +3816,7 @@ void tmvline4_addclamp () } while (--count); } -fixed_t tmvline1_subclamp () +fixed_t tmvline1_subclamp_C () { DWORD fracstep = dc_iscale; DWORD frac = dc_texturefrac; @@ -2938,14 +3830,45 @@ fixed_t tmvline1_subclamp () DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; - uint32_t light = calc_light_multiplier(dc_light); - do { BYTE pix = source[frac>>bits]; if (pix != 0) { -#ifndef PALETTEOUTPUT + DWORD a = (fg2rgb[colormap[pix]] | 0x40100400) - bg2rgb[*dest]; + DWORD b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a>>15)]; + } + frac += fracstep; + dest += pitch; + } while (--count); + + return frac; +} + +fixed_t tmvline1_subclamp_RGBA() +{ + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + BYTE *colormap = dc_colormap; + int count = dc_count; + const BYTE *source = dc_source; + canvas_pixel_t *dest = dc_dest; + int bits = tmvlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + + do + { + BYTE pix = source[frac >> bits]; + if (pix != 0) + { uint32_t fg = shade_pal_index(colormap[pix], light); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -2960,16 +3883,6 @@ fixed_t tmvline1_subclamp () uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; -#else - DWORD a = (fg2rgb[colormap[pix]] | 0x40100400) - bg2rgb[*dest]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a>>15)]; -#endif } frac += fracstep; dest += pitch; @@ -2978,7 +3891,7 @@ fixed_t tmvline1_subclamp () return frac; } -void tmvline4_subclamp () +void tmvline4_subclamp_C () { canvas_pixel_t *dest = dc_dest; int count = dc_count; @@ -2987,6 +3900,34 @@ void tmvline4_subclamp () DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + do + { + for (int i = 0; i < 4; ++i) + { + BYTE pix = bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + DWORD a = (fg2rgb[palookupoffse[i][pix]] | 0x40100400) - bg2rgb[dest[i]]; + DWORD b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[i] = RGB32k.All[a & (a>>15)]; + } + vplce[i] += vince[i]; + } + dest += dc_pitch; + } while (--count); +} + +void tmvline4_subclamp_RGBA() +{ + canvas_pixel_t *dest = dc_dest; + int count = dc_count; + int bits = tmvlinebits; + uint32_t light[4]; light[0] = calc_light_multiplier(palookuplight[0]); light[1] = calc_light_multiplier(palookuplight[1]); @@ -3000,7 +3941,6 @@ void tmvline4_subclamp () BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { -#ifndef PALETTEOUTPUT uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -3015,16 +3955,6 @@ void tmvline4_subclamp () uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; -#else - DWORD a = (fg2rgb[palookupoffse[i][pix]] | 0x40100400) - bg2rgb[dest[i]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[i] = RGB32k.All[a & (a>>15)]; -#endif } vplce[i] += vince[i]; } @@ -3032,7 +3962,7 @@ void tmvline4_subclamp () } while (--count); } -fixed_t tmvline1_revsubclamp () +fixed_t tmvline1_revsubclamp_C () { DWORD fracstep = dc_iscale; DWORD frac = dc_texturefrac; @@ -3046,14 +3976,45 @@ fixed_t tmvline1_revsubclamp () DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; - uint32_t light = calc_light_multiplier(dc_light); - do { BYTE pix = source[frac>>bits]; if (pix != 0) { -#ifndef PALETTEOUTPUT + DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[pix]]; + DWORD b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a>>15)]; + } + frac += fracstep; + dest += pitch; + } while (--count); + + return frac; +} + +fixed_t tmvline1_revsubclamp_RGBA() +{ + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + BYTE *colormap = dc_colormap; + int count = dc_count; + const BYTE *source = dc_source; + canvas_pixel_t *dest = dc_dest; + int bits = tmvlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + + do + { + BYTE pix = source[frac >> bits]; + if (pix != 0) + { uint32_t fg = shade_pal_index(colormap[pix], light); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -3068,16 +4029,6 @@ fixed_t tmvline1_revsubclamp () uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; -#else - DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[pix]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a>>15)]; -#endif } frac += fracstep; dest += pitch; @@ -3086,7 +4037,38 @@ fixed_t tmvline1_revsubclamp () return frac; } -void tmvline4_revsubclamp () +void tmvline4_revsubclamp_C () +{ + canvas_pixel_t *dest = dc_dest; + int count = dc_count; + int bits = tmvlinebits; + + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + + do + { + for (int i = 0; i < 4; ++i) + { + BYTE pix = bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + DWORD a = (bg2rgb[dest[i]] | 0x40100400) - fg2rgb[palookupoffse[i][pix]]; + DWORD b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[i] = RGB32k.All[a & (a>>15)]; + } + vplce[i] += vince[i]; + } + dest += dc_pitch; + } while (--count); +} + +void tmvline4_revsubclamp_RGBA() { canvas_pixel_t *dest = dc_dest; int count = dc_count; @@ -3108,7 +4090,6 @@ void tmvline4_revsubclamp () BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { -#ifndef PALETTEOUTPUT uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -3123,16 +4104,6 @@ void tmvline4_revsubclamp () uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; -#else - DWORD a = (bg2rgb[dest[i]] | 0x40100400) - fg2rgb[palookupoffse[i][pix]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[i] = RGB32k.All[a & (a>>15)]; -#endif } vplce[i] += vince[i]; } @@ -3164,6 +4135,85 @@ const BYTE *R_GetColumn (FTexture *tex, int col) // [RH] Initialize the column drawer pointers void R_InitColumnDrawers () { +#ifndef PALETTEOUTPUT + + R_DrawColumnHoriz = R_DrawColumnHorizP_RGBA_C; + R_DrawColumn = R_DrawColumnP_RGBA_C; + R_DrawFuzzColumn = R_DrawFuzzColumnP_RGBA_C; + R_DrawTranslatedColumn = R_DrawTranslatedColumnP_RGBA_C; + R_DrawShadedColumn = R_DrawShadedColumnP_RGBA_C; + R_DrawSpan = R_DrawSpanP_RGBA_C; + R_DrawSpanMasked = R_DrawSpanMaskedP_RGBA_C; + rt_map4cols = rt_map4cols_RGBA_c; + + R_DrawSpanTranslucent = R_DrawSpanTranslucentP_RGBA_C; + R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_RGBA_C; + R_DrawSpanAddClamp = R_DrawSpanAddClampP_RGBA_C; + R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_RGBA_C; + R_FillColumn = R_FillColumnP_RGBA; + R_FillAddColumn = R_FillAddColumn_RGBA_C; + R_FillAddClampColumn = R_FillAddClampColumn_RGBA; + R_FillSubClampColumn = R_FillSubClampColumn_RGBA; + R_FillRevSubClampColumn = R_FillRevSubClampColumn_RGBA; + R_DrawAddColumn = R_DrawAddColumnP_RGBA_C; + R_DrawTlatedAddColumn = R_DrawTlatedAddColumnP_RGBA_C; + R_DrawAddClampColumn = R_DrawAddClampColumnP_RGBA_C; + R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumnP_RGBA_C; + R_DrawSubClampColumn = R_DrawSubClampColumnP_RGBA_C; + R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumnP_RGBA_C; + R_DrawRevSubClampColumn = R_DrawRevSubClampColumnP_RGBA_C; + R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumnP_RGBA_C; + R_FillSpan = R_FillSpan_RGBA; + R_DrawFogBoundary = R_DrawFogBoundary_RGBA; + R_FillColumnHoriz = R_FillColumnHorizP_RGBA_C; + + R_DrawFogBoundary = R_DrawFogBoundary_RGBA; + R_MapColoredPlane = R_MapColoredPlane_RGBA; + R_DrawParticle = R_DrawParticle_RGBA; + + tmvline1_add = tmvline1_add_RGBA; + tmvline4_add = tmvline4_add_RGBA; + tmvline1_addclamp = tmvline1_addclamp_RGBA; + tmvline4_addclamp = tmvline4_addclamp_RGBA; + tmvline1_subclamp = tmvline1_subclamp_RGBA; + tmvline4_subclamp = tmvline4_subclamp_RGBA; + tmvline1_revsubclamp = tmvline1_revsubclamp_RGBA; + tmvline4_revsubclamp = tmvline4_revsubclamp_RGBA; + + rt_copy1col = rt_copy1col_RGBA_c; + rt_copy4cols = rt_copy4cols_RGBA_c; + rt_map1col = rt_map1col_RGBA_c; + rt_shaded4cols = rt_shaded4cols_RGBA_c; + rt_add4cols = rt_add4cols_RGBA_c; + rt_addclamp4cols = rt_addclamp4cols_RGBA_c; + rt_shaded1col = rt_shaded1col_RGBA_c; + rt_add1col = rt_add1col_RGBA_c; + rt_addclamp1col = rt_addclamp1col_RGBA_c; + rt_subclamp1col = rt_subclamp1col_RGBA_c; + rt_revsubclamp1col = rt_revsubclamp1col_RGBA_c; + rt_tlate1col = rt_tlate1col_RGBA_c; + rt_tlateadd1col = rt_tlateadd1col_RGBA_c; + rt_tlateaddclamp1col = rt_tlateaddclamp1col_RGBA_c; + rt_tlatesubclamp1col = rt_tlatesubclamp1col_RGBA_c; + rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_RGBA_c; + rt_map4cols = rt_map4cols_RGBA_c; + rt_subclamp4cols = rt_subclamp4cols_RGBA_c; + rt_revsubclamp4cols = rt_revsubclamp4cols_RGBA_c; + rt_tlate4cols = rt_tlate4cols_RGBA_c; + rt_tlateadd4cols = rt_tlateadd4cols_RGBA_c; + rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_RGBA_c; + rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_RGBA_c; + rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_RGBA_c; + rt_initcols = rt_initcols_rgba; + + dovline1 = vlinec1_RGBA; + doprevline1 = vlinec1_RGBA; + dovline4 = vlinec4_RGBA; + domvline1 = mvlinec1_RGBA; + domvline4 = mvlinec4_RGBA; + +#else + #ifdef X86_ASM R_DrawColumn = R_DrawColumnP_ASM; R_DrawColumnHoriz = R_DrawColumnHorizP_ASM; @@ -3194,6 +4244,72 @@ void R_InitColumnDrawers () R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_C; R_DrawSpanAddClamp = R_DrawSpanAddClampP_C; R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_C; + R_FillColumn = R_FillColumnP_C; + R_FillAddColumn = R_FillAddColumn_C; + R_FillAddClampColumn = R_FillAddClampColumn_C; + R_FillSubClampColumn = R_FillSubClampColumn_C; + R_FillRevSubClampColumn = R_FillRevSubClampColumn_C; + R_DrawAddColumn = R_DrawAddColumnP_C; + R_DrawTlatedAddColumn = R_DrawTlatedAddColumnP_C; + R_DrawAddClampColumn = R_DrawAddClampColumnP_C; + R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumnP_C; + R_DrawSubClampColumn = R_DrawSubClampColumnP_C; + R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumnP_C; + R_DrawRevSubClampColumn = R_DrawRevSubClampColumnP_C; + R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumnP_C; + R_FillSpan = R_FillSpan_C; + R_DrawFogBoundary = R_DrawFogBoundary_C; + R_FillColumnHoriz = R_FillColumnHorizP_C; + + R_DrawFogBoundary = R_DrawFogBoundary_C; + R_MapColoredPlane = R_MapColoredPlane_C; + R_DrawParticle = R_DrawParticle_C; + + tmvline1_add = tmvline1_add_C; + tmvline4_add = tmvline4_add_C; + tmvline1_addclamp = tmvline1_addclamp_C; + tmvline4_addclamp = tmvline4_addclamp_C; + tmvline1_subclamp = tmvline1_subclamp_C; + tmvline4_subclamp = tmvline4_subclamp_C; + tmvline1_revsubclamp = tmvline1_revsubclamp_C; + tmvline4_revsubclamp = tmvline4_revsubclamp_C; + +#ifdef X86_ASM + rt_copy1col = rt_copy1col_asm; + rt_copy4cols = rt_copy4cols_asm; + rt_map1col = rt_map1col_asm; + rt_shaded4cols = rt_shaded4cols_asm; + rt_add4cols = rt_add4cols_asm; + rt_addclamp4cols = rt_addclamp4cols_asm; +#else + rt_copy1col = rt_copy1col_c; + rt_copy4cols = rt_copy4cols_c; + rt_map1col = rt_map1col_c; + rt_shaded4cols = rt_shaded4cols_c; + rt_add4cols = rt_add4cols_c; + rt_addclamp4cols = rt_addclamp4cols_c; +#endif + rt_shaded1col = rt_shaded1col_c; + rt_add1col = rt_add1col_c; + rt_addclamp1col = rt_addclamp1col_c; + rt_subclamp1col = rt_subclamp1col_c; + rt_revsubclamp1col = rt_revsubclamp1col_c; + rt_tlate1col = rt_tlate1col_c; + rt_tlateadd1col = rt_tlateadd1col_c; + rt_tlateaddclamp1col = rt_tlateaddclamp1col_c; + rt_tlatesubclamp1col = rt_tlatesubclamp1col_c; + rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_c; + rt_map4cols = rt_map4cols_c; + rt_subclamp4cols = rt_subclamp4cols_c; + rt_revsubclamp4cols = rt_revsubclamp4cols_c; + rt_tlate4cols = rt_tlate4cols_c; + rt_tlateadd4cols = rt_tlateadd4cols_c; + rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_c; + rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_c; + rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_c; + rt_initcols = rt_initcols_pal; + +#endif } // [RH] Choose column drawers in a single place @@ -3211,7 +4327,7 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags) { if (flags & STYLEF_ColorIsFixed) { - colfunc = R_FillColumnP; + colfunc = R_FillColumn; hcolfunc_post1 = rt_copy1col; hcolfunc_post4 = rt_copy4cols; } @@ -3261,13 +4377,13 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags) } else if (dc_translation == NULL) { - colfunc = R_DrawAddColumnP_C; + colfunc = R_DrawAddColumn; hcolfunc_post1 = rt_add1col; hcolfunc_post4 = rt_add4cols; } else { - colfunc = R_DrawTlatedAddColumnP_C; + colfunc = R_DrawTlatedAddColumn; hcolfunc_post1 = rt_tlateadd1col; hcolfunc_post4 = rt_tlateadd4cols; } @@ -3282,13 +4398,13 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags) } else if (dc_translation == NULL) { - colfunc = R_DrawAddClampColumnP_C; + colfunc = R_DrawAddClampColumn; hcolfunc_post1 = rt_addclamp1col; hcolfunc_post4 = rt_addclamp4cols; } else { - colfunc = R_DrawAddClampTranslatedColumnP_C; + colfunc = R_DrawAddClampTranslatedColumn; hcolfunc_post1 = rt_tlateaddclamp1col; hcolfunc_post4 = rt_tlateaddclamp4cols; } @@ -3304,13 +4420,13 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags) } else if (dc_translation == NULL) { - colfunc = R_DrawSubClampColumnP_C; + colfunc = R_DrawSubClampColumn; hcolfunc_post1 = rt_subclamp1col; hcolfunc_post4 = rt_subclamp4cols; } else { - colfunc = R_DrawSubClampTranslatedColumnP_C; + colfunc = R_DrawSubClampTranslatedColumn; hcolfunc_post1 = rt_tlatesubclamp1col; hcolfunc_post4 = rt_tlatesubclamp4cols; } @@ -3329,13 +4445,13 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags) } else if (dc_translation == NULL) { - colfunc = R_DrawRevSubClampColumnP_C; + colfunc = R_DrawRevSubClampColumn; hcolfunc_post1 = rt_revsubclamp1col; hcolfunc_post4 = rt_revsubclamp4cols; } else { - colfunc = R_DrawRevSubClampTranslatedColumnP_C; + colfunc = R_DrawRevSubClampTranslatedColumn; hcolfunc_post1 = rt_tlaterevsubclamp1col; hcolfunc_post4 = rt_tlaterevsubclamp4cols; } @@ -3440,7 +4556,7 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, // dc_srccolor is used by the R_Fill* routines. It is premultiplied // with the alpha. dc_srccolor = ((((r*x)>>4)<<20) | ((g*x)>>4) | ((((b)*x)>>4)<<10)) & 0x3feffbff; - hcolfunc_pre = R_FillColumnHorizP; + hcolfunc_pre = R_FillColumnHoriz; dc_colormap = identitymap; dc_light = 0; } @@ -3459,25 +4575,25 @@ void R_FinishSetPatchStyle () bool R_GetTransMaskDrawers (fixed_t (**tmvline1)(), void (**tmvline4)()) { - if (colfunc == R_DrawAddColumnP_C) + if (colfunc == R_DrawAddColumn) { *tmvline1 = tmvline1_add; *tmvline4 = tmvline4_add; return true; } - if (colfunc == R_DrawAddClampColumnP_C) + if (colfunc == R_DrawAddClampColumn) { *tmvline1 = tmvline1_addclamp; *tmvline4 = tmvline4_addclamp; return true; } - if (colfunc == R_DrawSubClampColumnP_C) + if (colfunc == R_DrawSubClampColumn) { *tmvline1 = tmvline1_subclamp; *tmvline4 = tmvline4_subclamp; return true; } - if (colfunc == R_DrawRevSubClampColumnP_C) + if (colfunc == R_DrawRevSubClampColumn) { *tmvline1 = tmvline1_revsubclamp; *tmvline4 = tmvline4_revsubclamp; diff --git a/src/r_draw.h b/src/r_draw.h index 6f7a91154..17698c360 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -127,33 +127,33 @@ extern "C" void rt_copy1col_c (int hx, int sx, int yl, int yh); void rt_copy4cols_c (int sx, int yl, int yh); -void rt_shaded1col (int hx, int sx, int yl, int yh); +void rt_shaded1col_c (int hx, int sx, int yl, int yh); void rt_shaded4cols_c (int sx, int yl, int yh); void rt_shaded4cols_asm (int sx, int yl, int yh); void rt_map1col_c (int hx, int sx, int yl, int yh); -void rt_add1col (int hx, int sx, int yl, int yh); -void rt_addclamp1col (int hx, int sx, int yl, int yh); -void rt_subclamp1col (int hx, int sx, int yl, int yh); -void rt_revsubclamp1col (int hx, int sx, int yl, int yh); +void rt_add1col_c (int hx, int sx, int yl, int yh); +void rt_addclamp1col_c (int hx, int sx, int yl, int yh); +void rt_subclamp1col_c (int hx, int sx, int yl, int yh); +void rt_revsubclamp1col_c (int hx, int sx, int yl, int yh); -void rt_tlate1col (int hx, int sx, int yl, int yh); -void rt_tlateadd1col (int hx, int sx, int yl, int yh); -void rt_tlateaddclamp1col (int hx, int sx, int yl, int yh); -void rt_tlatesubclamp1col (int hx, int sx, int yl, int yh); -void rt_tlaterevsubclamp1col (int hx, int sx, int yl, int yh); +void rt_tlate1col_c (int hx, int sx, int yl, int yh); +void rt_tlateadd1col_c (int hx, int sx, int yl, int yh); +void rt_tlateaddclamp1col_c (int hx, int sx, int yl, int yh); +void rt_tlatesubclamp1col_c (int hx, int sx, int yl, int yh); +void rt_tlaterevsubclamp1col_c (int hx, int sx, int yl, int yh); void rt_map4cols_c (int sx, int yl, int yh); void rt_add4cols_c (int sx, int yl, int yh); void rt_addclamp4cols_c (int sx, int yl, int yh); -void rt_subclamp4cols (int sx, int yl, int yh); -void rt_revsubclamp4cols (int sx, int yl, int yh); +void rt_subclamp4cols_c (int sx, int yl, int yh); +void rt_revsubclamp4cols_c (int sx, int yl, int yh); -void rt_tlate4cols (int sx, int yl, int yh); -void rt_tlateadd4cols (int sx, int yl, int yh); -void rt_tlateaddclamp4cols (int sx, int yl, int yh); -void rt_tlatesubclamp4cols (int sx, int yl, int yh); -void rt_tlaterevsubclamp4cols (int sx, int yl, int yh); +void rt_tlate4cols_c (int sx, int yl, int yh); +void rt_tlateadd4cols_c (int sx, int yl, int yh); +void rt_tlateaddclamp4cols_c (int sx, int yl, int yh); +void rt_tlatesubclamp4cols_c (int sx, int yl, int yh); +void rt_tlaterevsubclamp4cols_c (int sx, int yl, int yh); void rt_copy1col_asm (int hx, int sx, int yl, int yh); void rt_map1col_asm (int hx, int sx, int yl, int yh); @@ -163,32 +163,83 @@ void rt_map4cols_asm1 (int sx, int yl, int yh); void rt_map4cols_asm2 (int sx, int yl, int yh); void rt_add4cols_asm (int sx, int yl, int yh); void rt_addclamp4cols_asm (int sx, int yl, int yh); + +/// + +void rt_copy1col_RGBA_c (int hx, int sx, int yl, int yh); +void rt_copy4cols_RGBA_c (int sx, int yl, int yh); + +void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh); +void rt_shaded4cols_RGBA_c (int sx, int yl, int yh); + +void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh); +void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh); +void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh); +void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh); +void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); + +void rt_tlate1col_RGBA_c (int hx, int sx, int yl, int yh); +void rt_tlateadd1col_RGBA_c (int hx, int sx, int yl, int yh); +void rt_tlateaddclamp1col_RGBA_c (int hx, int sx, int yl, int yh); +void rt_tlatesubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); +void rt_tlaterevsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); + +void rt_map4cols_RGBA_c (int sx, int yl, int yh); +void rt_add4cols_RGBA_c (int sx, int yl, int yh); +void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh); +void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh); +void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh); + +void rt_tlate4cols_RGBA_c (int sx, int yl, int yh); +void rt_tlateadd4cols_RGBA_c (int sx, int yl, int yh); +void rt_tlateaddclamp4cols_RGBA_c (int sx, int yl, int yh); +void rt_tlatesubclamp4cols_RGBA_c (int sx, int yl, int yh); +void rt_tlaterevsubclamp4cols_RGBA_c (int sx, int yl, int yh); + } -extern void (*rt_map4cols)(int sx, int yl, int yh); +extern void (*rt_copy1col)(int hx, int sx, int yl, int yh); +extern void (*rt_copy4cols)(int sx, int yl, int yh); -#ifdef X86_ASM -#define rt_copy1col rt_copy1col_asm -#define rt_copy4cols rt_copy4cols_asm -#define rt_map1col rt_map1col_asm -#define rt_shaded4cols rt_shaded4cols_asm -#define rt_add4cols rt_add4cols_asm -#define rt_addclamp4cols rt_addclamp4cols_asm -#else -#define rt_copy1col rt_copy1col_c -#define rt_copy4cols rt_copy4cols_c -#define rt_map1col rt_map1col_c -#define rt_shaded4cols rt_shaded4cols_c -#define rt_add4cols rt_add4cols_c -#define rt_addclamp4cols rt_addclamp4cols_c -#endif +extern void (*rt_shaded1col)(int hx, int sx, int yl, int yh); +extern void (*rt_shaded4cols)(int sx, int yl, int yh); + +extern void (*rt_map1col)(int hx, int sx, int yl, int yh); +extern void (*rt_add1col)(int hx, int sx, int yl, int yh); +extern void (*rt_addclamp1col)(int hx, int sx, int yl, int yh); +extern void (*rt_subclamp1col)(int hx, int sx, int yl, int yh); +extern void (*rt_revsubclamp1col)(int hx, int sx, int yl, int yh); + +extern void (*rt_tlate1col)(int hx, int sx, int yl, int yh); +extern void (*rt_tlateadd1col)(int hx, int sx, int yl, int yh); +extern void (*rt_tlateaddclamp1col)(int hx, int sx, int yl, int yh); +extern void (*rt_tlatesubclamp1col)(int hx, int sx, int yl, int yh); +extern void (*rt_tlaterevsubclamp1col)(int hx, int sx, int yl, int yh); + +extern void (*rt_map4cols)(int sx, int yl, int yh); +extern void (*rt_add4cols)(int sx, int yl, int yh); +extern void (*rt_addclamp4cols)(int sx, int yl, int yh); +extern void (*rt_subclamp4cols)(int sx, int yl, int yh); +extern void (*rt_revsubclamp4cols)(int sx, int yl, int yh); + +extern void (*rt_tlate4cols)(int sx, int yl, int yh); +extern void (*rt_tlateadd4cols)(int sx, int yl, int yh); +extern void (*rt_tlateaddclamp4cols)(int sx, int yl, int yh); +extern void (*rt_tlatesubclamp4cols)(int sx, int yl, int yh); +extern void (*rt_tlaterevsubclamp4cols)(int sx, int yl, int yh); + +extern void (*rt_initcols)(canvas_pixel_t *buffer); void rt_draw4cols (int sx); // [RH] Preps the temporary horizontal buffer. -void rt_initcols (canvas_pixel_t *buffer=NULL); +void rt_initcols_pal (canvas_pixel_t *buffer); +void rt_initcols_rgba (canvas_pixel_t *buffer); -void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip); + +extern void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip); + +void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip); #ifdef X86_ASM @@ -212,6 +263,14 @@ void R_DrawShadedColumnP_C (void); void R_DrawSpanP_C (void); void R_DrawSpanMaskedP_C (void); +void R_DrawColumnHorizP_RGBA_C (void); +void R_DrawColumnP_RGBA_C (void); +void R_DrawFuzzColumnP_RGBA_C (void); +void R_DrawTranslatedColumnP_RGBA_C (void); +void R_DrawShadedColumnP_RGBA_C (void); +void R_DrawSpanP_RGBA_C (void); +void R_DrawSpanMaskedP_RGBA_C (void); + #endif void R_DrawSpanTranslucentP_C (void); @@ -220,9 +279,30 @@ void R_DrawSpanMaskedTranslucentP_C (void); void R_DrawTlatedLucentColumnP_C (void); #define R_DrawTlatedLucentColumn R_DrawTlatedLucentColumnP_C -void R_FillColumnP (void); -void R_FillColumnHorizP (void); -void R_FillSpan (void); +extern void(*R_FillColumn)(void); +extern void(*R_FillAddColumn)(void); +extern void(*R_FillAddClampColumn)(void); +extern void(*R_FillSubClampColumn)(void); +extern void(*R_FillRevSubClampColumn)(void); +extern void(*R_DrawAddColumn)(void); +extern void(*R_DrawTlatedAddColumn)(void); +extern void(*R_DrawAddClampColumn)(void); +extern void(*R_DrawAddClampTranslatedColumn)(void); +extern void(*R_DrawSubClampColumn)(void); +extern void(*R_DrawSubClampTranslatedColumn)(void); +extern void(*R_DrawRevSubClampColumn)(void); +extern void(*R_DrawRevSubClampTranslatedColumn)(void); + +extern void(*R_FillSpan)(void); +extern void(*R_FillColumnHoriz)(void); + +void R_FillColumnP_C (void); + +void R_FillColumnHorizP_C (void); +void R_FillSpan_C (void); + +void R_FillColumnHorizP_RGBA_C(void); +void R_FillSpan_RGBA_C(void); #ifdef X86_ASM #define R_SetupDrawSlab R_SetupDrawSlabA @@ -282,6 +362,15 @@ inline ESPSResult R_SetPatchStyle(FRenderStyle style, float alpha, int translati // style was STYLE_Shade void R_FinishSetPatchStyle (); +extern fixed_t(*tmvline1_add)(); +extern void(*tmvline4_add)(); +extern fixed_t(*tmvline1_addclamp)(); +extern void(*tmvline4_addclamp)(); +extern fixed_t(*tmvline1_subclamp)(); +extern void(*tmvline4_subclamp)(); +extern fixed_t(*tmvline1_revsubclamp)(); +extern void(*tmvline4_revsubclamp)(); + // transmaskwallscan calls this to find out what column drawers to use bool R_GetTransMaskDrawers (fixed_t (**tmvline1)(), void (**tmvline4)()); diff --git a/src/r_drawt.cpp b/src/r_drawt.cpp index f5fc027b5..9520f59b3 100644 --- a/src/r_drawt.cpp +++ b/src/r_drawt.cpp @@ -114,13 +114,6 @@ void rt_copy1col_c (int hx, int sx, int yl, int yh) // Copies all four spans to the screen starting at sx. void rt_copy4cols_c (int sx, int yl, int yh) { -#ifndef PALETTEOUTPUT - // To do: we could do this with SSE using __m128i - rt_copy1col_c(0, sx, yl, yh); - rt_copy1col_c(1, sx + 1, yl, yh); - rt_copy1col_c(2, sx + 2, yl, yh); - rt_copy1col_c(3, sx + 3, yl, yh); -#else int *source; int *dest; int count; @@ -149,7 +142,6 @@ void rt_copy4cols_c (int sx, int yl, int yh) source += 8/sizeof(int); dest += pitch*2; } while (--count); -#endif } // Maps one span at hx to the screen at sx. @@ -166,21 +158,13 @@ void rt_map1col_c (int hx, int sx, int yl, int yh) return; count++; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); -#endif - colormap = dc_colormap; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; if (count & 1) { -#ifndef PALETTEOUTPUT - *dest = shade_pal_index(colormap[*source], light); -#else *dest = colormap[*source]; -#endif source += 4; dest += pitch; } @@ -188,13 +172,8 @@ void rt_map1col_c (int hx, int sx, int yl, int yh) return; do { -#ifndef PALETTEOUTPUT - dest[0] = shade_pal_index(colormap[source[0]], light); - dest[pitch] = shade_pal_index(colormap[source[4]], light); -#else dest[0] = colormap[source[0]]; dest[pitch] = colormap[source[4]]; -#endif source += 8; dest += pitch*2; } while (--count); @@ -214,27 +193,16 @@ void rt_map4cols_c (int sx, int yl, int yh) return; count++; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); -#endif - colormap = dc_colormap; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4]; pitch = dc_pitch; if (count & 1) { -#ifndef PALETTEOUTPUT - dest[0] = shade_pal_index(colormap[source[0]], light); - dest[1] = shade_pal_index(colormap[source[1]], light); - dest[2] = shade_pal_index(colormap[source[2]], light); - dest[3] = shade_pal_index(colormap[source[3]], light); -#else dest[0] = colormap[source[0]]; dest[1] = colormap[source[1]]; dest[2] = colormap[source[2]]; dest[3] = colormap[source[3]]; -#endif source += 4; dest += pitch; } @@ -242,16 +210,6 @@ void rt_map4cols_c (int sx, int yl, int yh) return; do { -#ifndef PALETTEOUTPUT - dest[0] = shade_pal_index(colormap[source[0]], light); - dest[1] = shade_pal_index(colormap[source[1]], light); - dest[2] = shade_pal_index(colormap[source[2]], light); - dest[3] = shade_pal_index(colormap[source[3]], light); - dest[pitch] = shade_pal_index(colormap[source[4]], light); - dest[pitch + 1] = shade_pal_index(colormap[source[5]], light); - dest[pitch + 2] = shade_pal_index(colormap[source[6]], light); - dest[pitch + 3] = shade_pal_index(colormap[source[7]], light); -#else dest[0] = colormap[source[0]]; dest[1] = colormap[source[1]]; dest[2] = colormap[source[2]]; @@ -260,7 +218,6 @@ void rt_map4cols_c (int sx, int yl, int yh) dest[pitch+1] = colormap[source[5]]; dest[pitch+2] = colormap[source[6]]; dest[pitch+3] = colormap[source[7]]; -#endif source += 8; dest += pitch*2; } while (--count); @@ -356,21 +313,21 @@ void rt_Translate4cols(const BYTE *translation, int yl, int yh) } // Translates one span at hx to the screen at sx. -void rt_tlate1col (int hx, int sx, int yl, int yh) +void rt_tlate1col_c (int hx, int sx, int yl, int yh) { rt_Translate1col(dc_translation, hx, yl, yh); rt_map1col(hx, sx, yl, yh); } // Translates all four spans to the screen starting at sx. -void rt_tlate4cols (int sx, int yl, int yh) +void rt_tlate4cols_c (int sx, int yl, int yh) { rt_Translate4cols(dc_translation, yl, yh); rt_map4cols(sx, yl, yh); } // Adds one span at hx to the screen at sx without clamping. -void rt_add1col (int hx, int sx, int yl, int yh) +void rt_add1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; canvas_pixel_t *source; @@ -388,29 +345,6 @@ void rt_add1col (int hx, int sx, int yl, int yh) pitch = dc_pitch; colormap = dc_colormap; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); - - do { - uint32_t fg = shade_pal_index(colormap[*source], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - - source += 4; - dest += pitch; - } while (--count); -#else DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; do { @@ -424,7 +358,6 @@ void rt_add1col (int hx, int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); -#endif } // Adds all four spans to the screen starting at sx without clamping. @@ -446,32 +379,6 @@ void rt_add4cols_c (int sx, int yl, int yh) pitch = dc_pitch; colormap = dc_colormap; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = shade_pal_index(colormap[source[i]], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - - source += 4; - dest += pitch; - } while (--count); -#else DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; @@ -508,25 +415,24 @@ void rt_add4cols_c (int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); -#endif } // Translates and adds one span at hx to the screen at sx without clamping. -void rt_tlateadd1col (int hx, int sx, int yl, int yh) +void rt_tlateadd1col_c (int hx, int sx, int yl, int yh) { rt_Translate1col(dc_translation, hx, yl, yh); rt_add1col(hx, sx, yl, yh); } // Translates and adds all four spans to the screen starting at sx without clamping. -void rt_tlateadd4cols (int sx, int yl, int yh) +void rt_tlateadd4cols_c (int sx, int yl, int yh) { rt_Translate4cols(dc_translation, yl, yh); rt_add4cols(sx, yl, yh); } // Shades one span at hx to the screen at sx. -void rt_shaded1col (int hx, int sx, int yl, int yh) +void rt_shaded1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; canvas_pixel_t *source; @@ -544,29 +450,6 @@ void rt_shaded1col (int hx, int sx, int yl, int yh) source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; -#ifndef PALETTEOUTPUT - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do { - uint32_t alpha = colormap[*source]; - uint32_t inv_alpha = 64 - alpha; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; - uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; - uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; - dest += pitch; - } while (--count); -#else DWORD *fgstart; fgstart = &Col2RGB8[0][dc_color]; @@ -578,7 +461,6 @@ void rt_shaded1col (int hx, int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); -#endif } // Shades all four spans to the screen starting at sx. @@ -600,32 +482,6 @@ void rt_shaded4cols_c (int sx, int yl, int yh) source = &dc_temp[yl*4]; pitch = dc_pitch; -#ifndef PALETTEOUTPUT - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do { - for (int i = 0; i < 4; i++) - { - uint32_t alpha = colormap[source[i]]; - uint32_t inv_alpha = 64 - alpha; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; - uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; - uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - source += 4; - dest += pitch; - } while (--count); -#else DWORD *fgstart; fgstart = &Col2RGB8[0][dc_color]; @@ -651,11 +507,10 @@ void rt_shaded4cols_c (int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); -#endif } // Adds one span at hx to the screen at sx with clamping. -void rt_addclamp1col (int hx, int sx, int yl, int yh) +void rt_addclamp1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; canvas_pixel_t *source; @@ -673,28 +528,6 @@ void rt_addclamp1col (int hx, int sx, int yl, int yh) pitch = dc_pitch; colormap = dc_colormap; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); - - do { - uint32_t fg = shade_pal_index(colormap[*source], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; - dest += pitch; - } while (--count); -#else DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; @@ -711,7 +544,6 @@ void rt_addclamp1col (int hx, int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); -#endif } // Adds all four spans to the screen starting at sx with clamping. @@ -733,31 +565,6 @@ void rt_addclamp4cols_c (int sx, int yl, int yh) pitch = dc_pitch; colormap = dc_colormap; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = shade_pal_index(colormap[source[i]], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - source += 4; - dest += pitch; - } while (--count); -#else DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; @@ -802,25 +609,24 @@ void rt_addclamp4cols_c (int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); -#endif } // Translates and adds one span at hx to the screen at sx with clamping. -void rt_tlateaddclamp1col (int hx, int sx, int yl, int yh) +void rt_tlateaddclamp1col_c (int hx, int sx, int yl, int yh) { rt_Translate1col(dc_translation, hx, yl, yh); rt_addclamp1col(hx, sx, yl, yh); } // Translates and adds all four spans to the screen starting at sx with clamping. -void rt_tlateaddclamp4cols (int sx, int yl, int yh) +void rt_tlateaddclamp4cols_c (int sx, int yl, int yh) { rt_Translate4cols(dc_translation, yl, yh); rt_addclamp4cols(sx, yl, yh); } // Subtracts one span at hx to the screen at sx with clamping. -void rt_subclamp1col (int hx, int sx, int yl, int yh) +void rt_subclamp1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; canvas_pixel_t *source; @@ -838,28 +644,6 @@ void rt_subclamp1col (int hx, int sx, int yl, int yh) pitch = dc_pitch; colormap = dc_colormap; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); - - do { - uint32_t fg = shade_pal_index(colormap[*source], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; - dest += pitch; - } while (--count); -#else DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; do { @@ -874,11 +658,10 @@ void rt_subclamp1col (int hx, int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); -#endif } // Subtracts all four spans to the screen starting at sx with clamping. -void rt_subclamp4cols (int sx, int yl, int yh) +void rt_subclamp4cols_c (int sx, int yl, int yh) { BYTE *colormap; canvas_pixel_t *source; @@ -896,32 +679,6 @@ void rt_subclamp4cols (int sx, int yl, int yh) pitch = dc_pitch; colormap = dc_colormap; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = shade_pal_index(colormap[source[i]], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - - source += 4; - dest += pitch; - } while (--count); -#else DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; do { @@ -961,25 +718,24 @@ void rt_subclamp4cols (int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); -#endif } // Translates and subtracts one span at hx to the screen at sx with clamping. -void rt_tlatesubclamp1col (int hx, int sx, int yl, int yh) +void rt_tlatesubclamp1col_c (int hx, int sx, int yl, int yh) { rt_Translate1col(dc_translation, hx, yl, yh); rt_subclamp1col(hx, sx, yl, yh); } // Translates and subtracts all four spans to the screen starting at sx with clamping. -void rt_tlatesubclamp4cols (int sx, int yl, int yh) +void rt_tlatesubclamp4cols_c (int sx, int yl, int yh) { rt_Translate4cols(dc_translation, yl, yh); rt_subclamp4cols(sx, yl, yh); } // Subtracts one span at hx from the screen at sx with clamping. -void rt_revsubclamp1col (int hx, int sx, int yl, int yh) +void rt_revsubclamp1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; canvas_pixel_t *source; @@ -999,28 +755,6 @@ void rt_revsubclamp1col (int hx, int sx, int yl, int yh) pitch = dc_pitch; colormap = dc_colormap; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); - - do { - uint32_t fg = shade_pal_index(colormap[*source], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; - dest += pitch; - } while (--count); -#else do { DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[*source]]; DWORD b = a; @@ -1033,11 +767,10 @@ void rt_revsubclamp1col (int hx, int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); -#endif } // Subtracts all four spans from the screen starting at sx with clamping. -void rt_revsubclamp4cols (int sx, int yl, int yh) +void rt_revsubclamp4cols_c (int sx, int yl, int yh) { BYTE *colormap; canvas_pixel_t *source; @@ -1057,32 +790,6 @@ void rt_revsubclamp4cols (int sx, int yl, int yh) pitch = dc_pitch; colormap = dc_colormap; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = shade_pal_index(colormap[source[i]], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - - source += 4; - dest += pitch; - } while (--count); -#else do { DWORD a = (bg2rgb[dest[0]] | 0x40100400) - fg2rgb[colormap[source[0]]]; DWORD b = a; @@ -1120,18 +827,17 @@ void rt_revsubclamp4cols (int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); -#endif } // Translates and subtracts one span at hx from the screen at sx with clamping. -void rt_tlaterevsubclamp1col (int hx, int sx, int yl, int yh) +void rt_tlaterevsubclamp1col_c (int hx, int sx, int yl, int yh) { rt_Translate1col(dc_translation, hx, yl, yh); rt_revsubclamp1col(hx, sx, yl, yh); } // Translates and subtracts all four spans from the screen starting at sx with clamping. -void rt_tlaterevsubclamp4cols (int sx, int yl, int yh) +void rt_tlaterevsubclamp4cols_c (int sx, int yl, int yh) { rt_Translate4cols(dc_translation, yl, yh); rt_revsubclamp4cols(sx, yl, yh); @@ -1301,7 +1007,7 @@ void rt_draw4cols (int sx) // Before each pass through a rendering loop that uses these routines, // call this function to set up the span pointers. -void rt_initcols (canvas_pixel_t *buff) +void rt_initcols_pal (canvas_pixel_t *buff) { int y; @@ -1372,7 +1078,7 @@ void R_DrawColumnHorizP_C (void) } // [RH] Just fills a column with a given color -void R_FillColumnHorizP (void) +void R_FillColumnHorizP_C (void) { int count = dc_count; BYTE color = dc_color; diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp new file mode 100644 index 000000000..e8111be8f --- /dev/null +++ b/src/r_drawt_rgba.cpp @@ -0,0 +1,883 @@ +/* +** r_drawt_rgba.cpp +** Faster column drawers for modern processors, true color edition +** +**--------------------------------------------------------------------------- +** Copyright 1998-2006 Randy Heit +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without +** modification, are permitted provided that the following conditions +** are met: +** +** 1. Redistributions of source code must retain the above copyright +** notice, this list of conditions and the following disclaimer. +** 2. Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimer in the +** documentation and/or other materials provided with the distribution. +** 3. The name of the author may not be used to endorse or promote products +** derived from this software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**--------------------------------------------------------------------------- +** +** True color versions of the similar functions in r_drawt.cpp +** Please see r_drawt.cpp for a description of the globals used. +*/ + +#include "templates.h" +#include "doomtype.h" +#include "doomdef.h" +#include "r_defs.h" +#include "r_draw.h" +#include "r_main.h" +#include "r_things.h" +#include "v_video.h" + +canvas_pixel_t dc_temp_rgbabuff_rgba[MAXHEIGHT*4]; +canvas_pixel_t *dc_temp_rgba; + +// Defined in r_draw_t.cpp: +extern unsigned int dc_tspans[4][MAXHEIGHT]; +extern unsigned int *dc_ctspan[4]; +extern unsigned int *horizspan[4]; + +// Copies one span at hx to the screen at sx. +void rt_copy1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4 + hx]; + pitch = dc_pitch; + + if (count & 1) { + *dest = *source; + source += 4; + dest += pitch; + } + if (count & 2) { + dest[0] = source[0]; + dest[pitch] = source[4]; + source += 8; + dest += pitch*2; + } + if (!(count >>= 2)) + return; + + do { + dest[0] = source[0]; + dest[pitch] = source[4]; + dest[pitch*2] = source[8]; + dest[pitch*3] = source[12]; + source += 16; + dest += pitch*4; + } while (--count); +} + +// Copies all four spans to the screen starting at sx. +void rt_copy4cols_RGBA_c (int sx, int yl, int yh) +{ + // To do: we could do this with SSE using __m128i + rt_copy1col_RGBA_c(0, sx, yl, yh); + rt_copy1col_RGBA_c(1, sx + 1, yl, yh); + rt_copy1col_RGBA_c(2, sx + 2, yl, yh); + rt_copy1col_RGBA_c(3, sx + 3, yl, yh); +} + +// Maps one span at hx to the screen at sx. +void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + uint32_t light = calc_light_multiplier(dc_light); + + colormap = dc_colormap; + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4 + hx]; + pitch = dc_pitch; + + if (count & 1) { + *dest = shade_pal_index(colormap[*source], light); + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + dest[0] = shade_pal_index(colormap[source[0]], light); + dest[pitch] = shade_pal_index(colormap[source[4]], light); + source += 8; + dest += pitch*2; + } while (--count); +} + +// Maps all four spans to the screen starting at sx. +void rt_map4cols_RGBA_c (int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + uint32_t light = calc_light_multiplier(dc_light); + + colormap = dc_colormap; + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4]; + pitch = dc_pitch; + + if (count & 1) { + dest[0] = shade_pal_index(colormap[source[0]], light); + dest[1] = shade_pal_index(colormap[source[1]], light); + dest[2] = shade_pal_index(colormap[source[2]], light); + dest[3] = shade_pal_index(colormap[source[3]], light); + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + dest[0] = shade_pal_index(colormap[source[0]], light); + dest[1] = shade_pal_index(colormap[source[1]], light); + dest[2] = shade_pal_index(colormap[source[2]], light); + dest[3] = shade_pal_index(colormap[source[3]], light); + dest[pitch] = shade_pal_index(colormap[source[4]], light); + dest[pitch + 1] = shade_pal_index(colormap[source[5]], light); + dest[pitch + 2] = shade_pal_index(colormap[source[6]], light); + dest[pitch + 3] = shade_pal_index(colormap[source[7]], light); + source += 8; + dest += pitch*2; + } while (--count); +} + +void rt_Translate1col_RGBA_c(const BYTE *translation, int hx, int yl, int yh) +{ + int count = yh - yl + 1; + canvas_pixel_t *source = &dc_temp_rgba[yl*4 + hx]; + + // Things we do to hit the compiler's optimizer with a clue bat: + // 1. Parallelism is explicitly spelled out by using a separate + // C instruction for each assembly instruction. GCC lets me + // have four temporaries, but VC++ spills to the stack with + // more than two. Two is probably optimal, anyway. + // 2. The results of the translation lookups are explicitly + // stored in byte-sized variables. This causes the VC++ code + // to use byte mov instructions in most cases; for apparently + // random reasons, it will use movzx for some places. GCC + // ignores this and uses movzx always. + + // Do 8 rows at a time. + for (int count8 = count >> 3; count8; --count8) + { + int c0, c1; + BYTE b0, b1; + + c0 = source[0]; c1 = source[4]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[4] = b1; + + c0 = source[8]; c1 = source[12]; + b0 = translation[c0]; b1 = translation[c1]; + source[8] = b0; source[12] = b1; + + c0 = source[16]; c1 = source[20]; + b0 = translation[c0]; b1 = translation[c1]; + source[16] = b0; source[20] = b1; + + c0 = source[24]; c1 = source[28]; + b0 = translation[c0]; b1 = translation[c1]; + source[24] = b0; source[28] = b1; + + source += 32; + } + // Finish by doing 1 row at a time. + for (count &= 7; count; --count, source += 4) + { + source[0] = translation[source[0]]; + } +} + +void rt_Translate4cols_RGBA_c(const BYTE *translation, int yl, int yh) +{ + int count = yh - yl + 1; + canvas_pixel_t *source = &dc_temp_rgba[yl*4]; + int c0, c1; + BYTE b0, b1; + + // Do 2 rows at a time. + for (int count8 = count >> 1; count8; --count8) + { + c0 = source[0]; c1 = source[1]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[1] = b1; + + c0 = source[2]; c1 = source[3]; + b0 = translation[c0]; b1 = translation[c1]; + source[2] = b0; source[3] = b1; + + c0 = source[4]; c1 = source[5]; + b0 = translation[c0]; b1 = translation[c1]; + source[4] = b0; source[5] = b1; + + c0 = source[6]; c1 = source[7]; + b0 = translation[c0]; b1 = translation[c1]; + source[6] = b0; source[7] = b1; + + source += 8; + } + // Do the final row if count was odd. + if (count & 1) + { + c0 = source[0]; c1 = source[1]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[1] = b1; + + c0 = source[2]; c1 = source[3]; + b0 = translation[c0]; b1 = translation[c1]; + source[2] = b0; source[3] = b1; + } +} + +// Translates one span at hx to the screen at sx. +void rt_tlate1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); + rt_map1col(hx, sx, yl, yh); +} + +// Translates all four spans to the screen starting at sx. +void rt_tlate4cols_RGBA_c (int sx, int yl, int yh) +{ + rt_Translate4cols_RGBA_c(dc_translation, yl, yh); + rt_map4cols(sx, yl, yh); +} + +// Adds one span at hx to the screen at sx without clamping. +void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4 + hx]; + pitch = dc_pitch; + colormap = dc_colormap; + + uint32_t light = calc_light_multiplier(dc_light); + + do { + uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + + source += 4; + dest += pitch; + } while (--count); +} + +// Adds all four spans to the screen starting at sx without clamping. +void rt_add4cols_RGBA_c (int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4]; + pitch = dc_pitch; + colormap = dc_colormap; + + uint32_t light = calc_light_multiplier(dc_light); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + + source += 4; + dest += pitch; + } while (--count); +} + +// Translates and adds one span at hx to the screen at sx without clamping. +void rt_tlateadd1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); + rt_add1col(hx, sx, yl, yh); +} + +// Translates and adds all four spans to the screen starting at sx without clamping. +void rt_tlateadd4cols_RGBA_c(int sx, int yl, int yh) +{ + rt_Translate4cols_RGBA_c(dc_translation, yl, yh); + rt_add4cols(sx, yl, yh); +} + +// Shades one span at hx to the screen at sx. +void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + colormap = dc_colormap; + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4 + hx]; + pitch = dc_pitch; + + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do { + uint32_t alpha = colormap[*source]; + uint32_t inv_alpha = 64 - alpha; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; + uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; + uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); +} + +// Shades all four spans to the screen starting at sx. +void rt_shaded4cols_RGBA_c (int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + colormap = dc_colormap; + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4]; + pitch = dc_pitch; + + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do { + for (int i = 0; i < 4; i++) + { + uint32_t alpha = colormap[source[i]]; + uint32_t inv_alpha = 64 - alpha; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; + uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; + uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + source += 4; + dest += pitch; + } while (--count); +} + +// Adds one span at hx to the screen at sx with clamping. +void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4 + hx]; + pitch = dc_pitch; + colormap = dc_colormap; + + uint32_t light = calc_light_multiplier(dc_light); + + do { + uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); +} + +// Adds all four spans to the screen starting at sx with clamping. +void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4]; + pitch = dc_pitch; + colormap = dc_colormap; + + uint32_t light = calc_light_multiplier(dc_light); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + source += 4; + dest += pitch; + } while (--count); +} + +// Translates and adds one span at hx to the screen at sx with clamping. +void rt_tlateaddclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); + rt_addclamp1col_RGBA_c(hx, sx, yl, yh); +} + +// Translates and adds all four spans to the screen starting at sx with clamping. +void rt_tlateaddclamp4cols_RGBA_c (int sx, int yl, int yh) +{ + rt_Translate4cols_RGBA_c(dc_translation, yl, yh); + rt_addclamp4cols(sx, yl, yh); +} + +// Subtracts one span at hx to the screen at sx with clamping. +void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4 + hx]; + pitch = dc_pitch; + colormap = dc_colormap; + + uint32_t light = calc_light_multiplier(dc_light); + + do { + uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); +} + +// Subtracts all four spans to the screen starting at sx with clamping. +void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4]; + pitch = dc_pitch; + colormap = dc_colormap; + + uint32_t light = calc_light_multiplier(dc_light); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + + source += 4; + dest += pitch; + } while (--count); +} + +// Translates and subtracts one span at hx to the screen at sx with clamping. +void rt_tlatesubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); + rt_subclamp1col_RGBA_c(hx, sx, yl, yh); +} + +// Translates and subtracts all four spans to the screen starting at sx with clamping. +void rt_tlatesubclamp4cols_RGBA_c (int sx, int yl, int yh) +{ + rt_Translate4cols_RGBA_c(dc_translation, yl, yh); + rt_subclamp4cols_RGBA_c(sx, yl, yh); +} + +// Subtracts one span at hx from the screen at sx with clamping. +void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4 + hx]; + pitch = dc_pitch; + colormap = dc_colormap; + + uint32_t light = calc_light_multiplier(dc_light); + + do { + uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); +} + +// Subtracts all four spans from the screen starting at sx with clamping. +void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4]; + pitch = dc_pitch; + colormap = dc_colormap; + + uint32_t light = calc_light_multiplier(dc_light); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + + source += 4; + dest += pitch; + } while (--count); +} + +// Translates and subtracts one span at hx from the screen at sx with clamping. +void rt_tlaterevsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); + rt_revsubclamp1col_RGBA_c(hx, sx, yl, yh); +} + +// Translates and subtracts all four spans from the screen starting at sx with clamping. +void rt_tlaterevsubclamp4cols_RGBA_c (int sx, int yl, int yh) +{ + rt_Translate4cols_RGBA_c(dc_translation, yl, yh); + rt_revsubclamp4cols_RGBA_c(sx, yl, yh); +} + +// Before each pass through a rendering loop that uses these routines, +// call this function to set up the span pointers. +void rt_initcols_rgba (canvas_pixel_t *buff) +{ + int y; + + dc_temp_rgba = buff == NULL ? dc_temp_rgbabuff_rgba : buff; + for (y = 3; y >= 0; y--) + horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0]; +} + +// Stretches a column into a temporary buffer which is later +// drawn to the screen along with up to three other columns. +void R_DrawColumnHorizP_RGBA_C (void) +{ + int count = dc_count; + canvas_pixel_t *dest; + fixed_t fracstep; + fixed_t frac; + + if (count <= 0) + return; + + { + int x = dc_x & 3; + unsigned int **span; + + span = &dc_ctspan[x]; + (*span)[0] = dc_yl; + (*span)[1] = dc_yh; + *span += 2; + dest = &dc_temp_rgba[x + 4*dc_yl]; + } + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + const BYTE *source = dc_source; + + if (count & 1) { + *dest = source[frac>>FRACBITS]; dest += 4; frac += fracstep; + } + if (count & 2) { + dest[0] = source[frac>>FRACBITS]; frac += fracstep; + dest[4] = source[frac>>FRACBITS]; frac += fracstep; + dest += 8; + } + if (count & 4) { + dest[0] = source[frac>>FRACBITS]; frac += fracstep; + dest[4] = source[frac>>FRACBITS]; frac += fracstep; + dest[8] = source[frac>>FRACBITS]; frac += fracstep; + dest[12]= source[frac>>FRACBITS]; frac += fracstep; + dest += 16; + } + count >>= 3; + if (!count) return; + + do + { + dest[0] = source[frac>>FRACBITS]; frac += fracstep; + dest[4] = source[frac>>FRACBITS]; frac += fracstep; + dest[8] = source[frac>>FRACBITS]; frac += fracstep; + dest[12]= source[frac>>FRACBITS]; frac += fracstep; + dest[16]= source[frac>>FRACBITS]; frac += fracstep; + dest[20]= source[frac>>FRACBITS]; frac += fracstep; + dest[24]= source[frac>>FRACBITS]; frac += fracstep; + dest[28]= source[frac>>FRACBITS]; frac += fracstep; + dest += 32; + } while (--count); + } +} + +// [RH] Just fills a column with a given color +void R_FillColumnHorizP_RGBA_C (void) +{ + int count = dc_count; + BYTE color = dc_color; + canvas_pixel_t *dest; + + if (count <= 0) + return; + + { + int x = dc_x & 3; + unsigned int **span = &dc_ctspan[x]; + + (*span)[0] = dc_yl; + (*span)[1] = dc_yh; + *span += 2; + dest = &dc_temp_rgba[x + 4*dc_yl]; + } + + if (count & 1) { + *dest = color; + dest += 4; + } + if (!(count >>= 1)) + return; + do { + dest[0] = color; dest[4] = color; + dest += 8; + } while (--count); +} diff --git a/src/r_main.cpp b/src/r_main.cpp index 04e798981..b7723d07d 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -847,10 +847,10 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) // [RH] Show off segs if r_drawflat is 1 if (r_drawflat) { - hcolfunc_pre = R_FillColumnHorizP; + hcolfunc_pre = R_FillColumnHoriz; hcolfunc_post1 = rt_copy1col; hcolfunc_post4 = rt_copy4cols; - colfunc = R_FillColumnP; + colfunc = R_FillColumn; spanfunc = R_FillSpan; } else diff --git a/src/r_plane.cpp b/src/r_plane.cpp index b385302e5..c8258a1ba 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -491,18 +491,19 @@ void R_MapTiltedPlane (int y, int x1) // //========================================================================== -void R_MapColoredPlane (int y, int x1) +void R_MapColoredPlane_C (int y, int x1) +{ + memset (ylookup[y] + x1 + dc_destorg, ds_color, (spanend[y] - x1 + 1)); +} + +void R_MapColoredPlane_RGBA(int y, int x1) { -#ifndef PALETTEOUTPUT canvas_pixel_t *dest = ylookup[y] + x1 + dc_destorg; int count = (spanend[y] - x1 + 1); uint32_t light = calc_light_multiplier(ds_light); uint32_t color = shade_pal_index(ds_color, light); for (int i = 0; i < count; i++) dest[i] = color; -#else - memset (ylookup[y] + x1 + dc_destorg, ds_color, (spanend[y] - x1 + 1) * sizeof(canvas_pixel_t)); -#endif } //========================================================================== diff --git a/src/r_plane.h b/src/r_plane.h index d4db3dc09..ac63501e3 100644 --- a/src/r_plane.h +++ b/src/r_plane.h @@ -93,6 +93,10 @@ void R_DrawNormalPlane (visplane_t *pl, double xscale, double yscale, fixed_t al void R_DrawTiltedPlane (visplane_t *pl, double xscale, double yscale, fixed_t alpha, bool additive, bool masked); void R_MapVisPlane (visplane_t *pl, void (*mapfunc)(int y, int x1)); +extern void(*R_MapColoredPlane)(int y, int x1); +void R_MapColoredPlane_C(int y, int x1); +void R_MapColoredPlane_RGBA(int y, int x1); + visplane_t *R_FindPlane ( const secplane_t &height, FTextureID picnum, diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 1cdb78555..fb27a99de 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -463,7 +463,7 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) while (dc_x < stop) { - rt_initcols(); + rt_initcols(nullptr); BlastMaskedColumn (R_DrawMaskedColumnHoriz, tex); dc_x++; BlastMaskedColumn (R_DrawMaskedColumnHoriz, tex); dc_x++; BlastMaskedColumn (R_DrawMaskedColumnHoriz, tex); dc_x++; @@ -3319,7 +3319,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, dc_light = 0; #endif } - rt_initcols(); + rt_initcols(nullptr); for (int zz = 4; zz; --zz) { R_WallSpriteColumn (R_DrawMaskedColumnHoriz); diff --git a/src/r_things.cpp b/src/r_things.cpp index 0e55b45f9..a6f6aea28 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -470,7 +470,7 @@ void R_DrawVisSprite (vissprite_t *vis) while (dc_x < stop4) { - rt_initcols(); + rt_initcols(nullptr); for (int zz = 4; zz; --zz) { pixels = tex->GetColumn (frac >> FRACBITS, &spans); @@ -619,7 +619,7 @@ void R_DrawWallSprite(vissprite_t *spr) dc_light = FLOAT2FIXED(MAXLIGHTVIS); #endif } - rt_initcols(); + rt_initcols(nullptr); for (int zz = 4; zz; --zz) { if (!R_ClipSpriteColumnWithPortals(spr)) @@ -681,7 +681,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop { return; } - if (colfunc == fuzzcolfunc || colfunc == R_FillColumnP) + if (colfunc == fuzzcolfunc || colfunc == R_FillColumn) { flags = DVF_OFFSCREEN | DVF_SPANSONLY; } @@ -2617,7 +2617,7 @@ static void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis) } } -void R_DrawParticle (vissprite_t *vis) +void R_DrawParticle_C (vissprite_t *vis) { int spacing; canvas_pixel_t *dest; @@ -2629,44 +2629,6 @@ void R_DrawParticle (vissprite_t *vis) R_DrawMaskedSegsBehindParticle (vis); -#ifndef PALETTEOUTPUT - uint32_t fg = shade_pal_index(color, calc_light_multiplier(0)); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - // vis->renderflags holds translucency level (0-255) - fixed_t fglevel = ((vis->renderflags + 1) << 8) & ~0x3ff; - uint32_t alpha = fglevel * 256 / FRACUNIT; - uint32_t inv_alpha = 256 - alpha; - - fg_red *= alpha; - fg_green *= alpha; - fg_blue *= alpha; - - spacing = RenderTarget->GetPitch(); - - for (int x = x1; x < (x1 + countbase); x++) - { - dc_x = x; - if (R_ClipSpriteColumnWithPortals(vis)) - continue; - dest = ylookup[yl] + x + dc_destorg; - for (int y = 0; y < ycount; y++) - { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red + bg_red * alpha) / 256; - uint32_t green = (fg_green + bg_green * alpha) / 256; - uint32_t blue = (fg_blue + bg_blue * alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += spacing; - } - } -#else DWORD *bg2rgb; DWORD fg; @@ -2719,7 +2681,56 @@ void R_DrawParticle (vissprite_t *vis) dest += spacing; } } -#endif +} + +void R_DrawParticle_RGBA(vissprite_t *vis) +{ + int spacing; + canvas_pixel_t *dest; + BYTE color = vis->Style.colormap[vis->startfrac]; + int yl = vis->y1; + int ycount = vis->y2 - yl + 1; + int x1 = vis->x1; + int countbase = vis->x2 - x1; + + R_DrawMaskedSegsBehindParticle(vis); + + uint32_t fg = shade_pal_index(color, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + // vis->renderflags holds translucency level (0-255) + fixed_t fglevel = ((vis->renderflags + 1) << 8) & ~0x3ff; + uint32_t alpha = fglevel * 256 / FRACUNIT; + uint32_t inv_alpha = 256 - alpha; + + fg_red *= alpha; + fg_green *= alpha; + fg_blue *= alpha; + + spacing = RenderTarget->GetPitch(); + + for (int x = x1; x < (x1 + countbase); x++) + { + dc_x = x; + if (R_ClipSpriteColumnWithPortals(vis)) + continue; + dest = ylookup[yl] + x + dc_destorg; + for (int y = 0; y < ycount; y++) + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red * alpha) / 256; + uint32_t green = (fg_green + bg_green * alpha) / 256; + uint32_t blue = (fg_blue + bg_blue * alpha) / 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += spacing; + } + } } extern double BaseYaspectMul;; diff --git a/src/r_things.h b/src/r_things.h index 1cf9b0200..057b7cfe2 100644 --- a/src/r_things.h +++ b/src/r_things.h @@ -97,7 +97,10 @@ struct vissprite_t struct particle_t; -void R_DrawParticle (vissprite_t *); +extern void(*R_DrawParticle)(vissprite_t *); +void R_DrawParticle_C (vissprite_t *); +void R_DrawParticle_RGBA (vissprite_t *); + void R_ProjectParticle (particle_t *, const sector_t *sector, int shade, int fakeside); extern int MaxVisSprites; diff --git a/src/v_draw.cpp b/src/v_draw.cpp index fd14b5e0a..8853fc947 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -300,7 +300,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) while (dc_x < stop4) { - rt_initcols(); + rt_initcols(nullptr); for (int zz = 4; zz; --zz) { pixels = img->GetColumn(frac >> FRACBITS, spanptr); From 7080180d478ae7158e4f2b8d1821089105764c8f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 30 May 2016 13:32:24 +0200 Subject: [PATCH 03/94] Added menu option for toggling true color output on and off --- src/r_draw.cpp | 327 ++++++++++++++++++------------------- src/r_main.cpp | 1 + src/win32/fb_d3d9.cpp | 79 +++++---- src/win32/win32iface.h | 1 + wadsrc/static/language.enu | 1 + wadsrc/static/menudef.txt | 1 + 6 files changed, 216 insertions(+), 194 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index d2b694f05..83a4472f3 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -178,7 +178,7 @@ FDynamicColormap ShadeFakeColormap[16]; BYTE identitymap[256]; EXTERN_CVAR (Int, r_columnmethod) - +EXTERN_CVAR (Bool, r_swtruecolor) void R_InitShadeMaps() { @@ -4135,181 +4135,180 @@ const BYTE *R_GetColumn (FTexture *tex, int col) // [RH] Initialize the column drawer pointers void R_InitColumnDrawers () { -#ifndef PALETTEOUTPUT - - R_DrawColumnHoriz = R_DrawColumnHorizP_RGBA_C; - R_DrawColumn = R_DrawColumnP_RGBA_C; - R_DrawFuzzColumn = R_DrawFuzzColumnP_RGBA_C; - R_DrawTranslatedColumn = R_DrawTranslatedColumnP_RGBA_C; - R_DrawShadedColumn = R_DrawShadedColumnP_RGBA_C; - R_DrawSpan = R_DrawSpanP_RGBA_C; - R_DrawSpanMasked = R_DrawSpanMaskedP_RGBA_C; - rt_map4cols = rt_map4cols_RGBA_c; - - R_DrawSpanTranslucent = R_DrawSpanTranslucentP_RGBA_C; - R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_RGBA_C; - R_DrawSpanAddClamp = R_DrawSpanAddClampP_RGBA_C; - R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_RGBA_C; - R_FillColumn = R_FillColumnP_RGBA; - R_FillAddColumn = R_FillAddColumn_RGBA_C; - R_FillAddClampColumn = R_FillAddClampColumn_RGBA; - R_FillSubClampColumn = R_FillSubClampColumn_RGBA; - R_FillRevSubClampColumn = R_FillRevSubClampColumn_RGBA; - R_DrawAddColumn = R_DrawAddColumnP_RGBA_C; - R_DrawTlatedAddColumn = R_DrawTlatedAddColumnP_RGBA_C; - R_DrawAddClampColumn = R_DrawAddClampColumnP_RGBA_C; - R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumnP_RGBA_C; - R_DrawSubClampColumn = R_DrawSubClampColumnP_RGBA_C; - R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumnP_RGBA_C; - R_DrawRevSubClampColumn = R_DrawRevSubClampColumnP_RGBA_C; - R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumnP_RGBA_C; - R_FillSpan = R_FillSpan_RGBA; - R_DrawFogBoundary = R_DrawFogBoundary_RGBA; - R_FillColumnHoriz = R_FillColumnHorizP_RGBA_C; - - R_DrawFogBoundary = R_DrawFogBoundary_RGBA; - R_MapColoredPlane = R_MapColoredPlane_RGBA; - R_DrawParticle = R_DrawParticle_RGBA; - - tmvline1_add = tmvline1_add_RGBA; - tmvline4_add = tmvline4_add_RGBA; - tmvline1_addclamp = tmvline1_addclamp_RGBA; - tmvline4_addclamp = tmvline4_addclamp_RGBA; - tmvline1_subclamp = tmvline1_subclamp_RGBA; - tmvline4_subclamp = tmvline4_subclamp_RGBA; - tmvline1_revsubclamp = tmvline1_revsubclamp_RGBA; - tmvline4_revsubclamp = tmvline4_revsubclamp_RGBA; - - rt_copy1col = rt_copy1col_RGBA_c; - rt_copy4cols = rt_copy4cols_RGBA_c; - rt_map1col = rt_map1col_RGBA_c; - rt_shaded4cols = rt_shaded4cols_RGBA_c; - rt_add4cols = rt_add4cols_RGBA_c; - rt_addclamp4cols = rt_addclamp4cols_RGBA_c; - rt_shaded1col = rt_shaded1col_RGBA_c; - rt_add1col = rt_add1col_RGBA_c; - rt_addclamp1col = rt_addclamp1col_RGBA_c; - rt_subclamp1col = rt_subclamp1col_RGBA_c; - rt_revsubclamp1col = rt_revsubclamp1col_RGBA_c; - rt_tlate1col = rt_tlate1col_RGBA_c; - rt_tlateadd1col = rt_tlateadd1col_RGBA_c; - rt_tlateaddclamp1col = rt_tlateaddclamp1col_RGBA_c; - rt_tlatesubclamp1col = rt_tlatesubclamp1col_RGBA_c; - rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_RGBA_c; - rt_map4cols = rt_map4cols_RGBA_c; - rt_subclamp4cols = rt_subclamp4cols_RGBA_c; - rt_revsubclamp4cols = rt_revsubclamp4cols_RGBA_c; - rt_tlate4cols = rt_tlate4cols_RGBA_c; - rt_tlateadd4cols = rt_tlateadd4cols_RGBA_c; - rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_RGBA_c; - rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_RGBA_c; - rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_RGBA_c; - rt_initcols = rt_initcols_rgba; - - dovline1 = vlinec1_RGBA; - doprevline1 = vlinec1_RGBA; - dovline4 = vlinec4_RGBA; - domvline1 = mvlinec1_RGBA; - domvline4 = mvlinec4_RGBA; - -#else - -#ifdef X86_ASM - R_DrawColumn = R_DrawColumnP_ASM; - R_DrawColumnHoriz = R_DrawColumnHorizP_ASM; - R_DrawFuzzColumn = R_DrawFuzzColumnP_ASM; - R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C; - R_DrawShadedColumn = R_DrawShadedColumnP_C; - R_DrawSpan = R_DrawSpanP_ASM; - R_DrawSpanMasked = R_DrawSpanMaskedP_ASM; - if (CPU.Family <= 5) + if (r_swtruecolor) { - rt_map4cols = rt_map4cols_asm2; + R_DrawColumnHoriz = R_DrawColumnHorizP_RGBA_C; + R_DrawColumn = R_DrawColumnP_RGBA_C; + R_DrawFuzzColumn = R_DrawFuzzColumnP_RGBA_C; + R_DrawTranslatedColumn = R_DrawTranslatedColumnP_RGBA_C; + R_DrawShadedColumn = R_DrawShadedColumnP_RGBA_C; + R_DrawSpan = R_DrawSpanP_RGBA_C; + R_DrawSpanMasked = R_DrawSpanMaskedP_RGBA_C; + rt_map4cols = rt_map4cols_RGBA_c; + + R_DrawSpanTranslucent = R_DrawSpanTranslucentP_RGBA_C; + R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_RGBA_C; + R_DrawSpanAddClamp = R_DrawSpanAddClampP_RGBA_C; + R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_RGBA_C; + R_FillColumn = R_FillColumnP_RGBA; + R_FillAddColumn = R_FillAddColumn_RGBA_C; + R_FillAddClampColumn = R_FillAddClampColumn_RGBA; + R_FillSubClampColumn = R_FillSubClampColumn_RGBA; + R_FillRevSubClampColumn = R_FillRevSubClampColumn_RGBA; + R_DrawAddColumn = R_DrawAddColumnP_RGBA_C; + R_DrawTlatedAddColumn = R_DrawTlatedAddColumnP_RGBA_C; + R_DrawAddClampColumn = R_DrawAddClampColumnP_RGBA_C; + R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumnP_RGBA_C; + R_DrawSubClampColumn = R_DrawSubClampColumnP_RGBA_C; + R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumnP_RGBA_C; + R_DrawRevSubClampColumn = R_DrawRevSubClampColumnP_RGBA_C; + R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumnP_RGBA_C; + R_FillSpan = R_FillSpan_RGBA; + R_DrawFogBoundary = R_DrawFogBoundary_RGBA; + R_FillColumnHoriz = R_FillColumnHorizP_RGBA_C; + + R_DrawFogBoundary = R_DrawFogBoundary_RGBA; + R_MapColoredPlane = R_MapColoredPlane_RGBA; + R_DrawParticle = R_DrawParticle_RGBA; + + tmvline1_add = tmvline1_add_RGBA; + tmvline4_add = tmvline4_add_RGBA; + tmvline1_addclamp = tmvline1_addclamp_RGBA; + tmvline4_addclamp = tmvline4_addclamp_RGBA; + tmvline1_subclamp = tmvline1_subclamp_RGBA; + tmvline4_subclamp = tmvline4_subclamp_RGBA; + tmvline1_revsubclamp = tmvline1_revsubclamp_RGBA; + tmvline4_revsubclamp = tmvline4_revsubclamp_RGBA; + + rt_copy1col = rt_copy1col_RGBA_c; + rt_copy4cols = rt_copy4cols_RGBA_c; + rt_map1col = rt_map1col_RGBA_c; + rt_shaded4cols = rt_shaded4cols_RGBA_c; + rt_add4cols = rt_add4cols_RGBA_c; + rt_addclamp4cols = rt_addclamp4cols_RGBA_c; + rt_shaded1col = rt_shaded1col_RGBA_c; + rt_add1col = rt_add1col_RGBA_c; + rt_addclamp1col = rt_addclamp1col_RGBA_c; + rt_subclamp1col = rt_subclamp1col_RGBA_c; + rt_revsubclamp1col = rt_revsubclamp1col_RGBA_c; + rt_tlate1col = rt_tlate1col_RGBA_c; + rt_tlateadd1col = rt_tlateadd1col_RGBA_c; + rt_tlateaddclamp1col = rt_tlateaddclamp1col_RGBA_c; + rt_tlatesubclamp1col = rt_tlatesubclamp1col_RGBA_c; + rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_RGBA_c; + rt_map4cols = rt_map4cols_RGBA_c; + rt_subclamp4cols = rt_subclamp4cols_RGBA_c; + rt_revsubclamp4cols = rt_revsubclamp4cols_RGBA_c; + rt_tlate4cols = rt_tlate4cols_RGBA_c; + rt_tlateadd4cols = rt_tlateadd4cols_RGBA_c; + rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_RGBA_c; + rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_RGBA_c; + rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_RGBA_c; + rt_initcols = rt_initcols_rgba; + + dovline1 = vlinec1_RGBA; + doprevline1 = vlinec1_RGBA; + dovline4 = vlinec4_RGBA; + domvline1 = mvlinec1_RGBA; + domvline4 = mvlinec4_RGBA; } else { - rt_map4cols = rt_map4cols_asm1; - } +#ifdef X86_ASM + R_DrawColumn = R_DrawColumnP_ASM; + R_DrawColumnHoriz = R_DrawColumnHorizP_ASM; + R_DrawFuzzColumn = R_DrawFuzzColumnP_ASM; + R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C; + R_DrawShadedColumn = R_DrawShadedColumnP_C; + R_DrawSpan = R_DrawSpanP_ASM; + R_DrawSpanMasked = R_DrawSpanMaskedP_ASM; + if (CPU.Family <= 5) + { + rt_map4cols = rt_map4cols_asm2; + } + else + { + rt_map4cols = rt_map4cols_asm1; + } #else - R_DrawColumnHoriz = R_DrawColumnHorizP_C; - R_DrawColumn = R_DrawColumnP_C; - R_DrawFuzzColumn = R_DrawFuzzColumnP_C; - R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C; - R_DrawShadedColumn = R_DrawShadedColumnP_C; - R_DrawSpan = R_DrawSpanP_C; - R_DrawSpanMasked = R_DrawSpanMaskedP_C; - rt_map4cols = rt_map4cols_c; + R_DrawColumnHoriz = R_DrawColumnHorizP_C; + R_DrawColumn = R_DrawColumnP_C; + R_DrawFuzzColumn = R_DrawFuzzColumnP_C; + R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C; + R_DrawShadedColumn = R_DrawShadedColumnP_C; + R_DrawSpan = R_DrawSpanP_C; + R_DrawSpanMasked = R_DrawSpanMaskedP_C; + rt_map4cols = rt_map4cols_c; #endif - R_DrawSpanTranslucent = R_DrawSpanTranslucentP_C; - R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_C; - R_DrawSpanAddClamp = R_DrawSpanAddClampP_C; - R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_C; - R_FillColumn = R_FillColumnP_C; - R_FillAddColumn = R_FillAddColumn_C; - R_FillAddClampColumn = R_FillAddClampColumn_C; - R_FillSubClampColumn = R_FillSubClampColumn_C; - R_FillRevSubClampColumn = R_FillRevSubClampColumn_C; - R_DrawAddColumn = R_DrawAddColumnP_C; - R_DrawTlatedAddColumn = R_DrawTlatedAddColumnP_C; - R_DrawAddClampColumn = R_DrawAddClampColumnP_C; - R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumnP_C; - R_DrawSubClampColumn = R_DrawSubClampColumnP_C; - R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumnP_C; - R_DrawRevSubClampColumn = R_DrawRevSubClampColumnP_C; - R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumnP_C; - R_FillSpan = R_FillSpan_C; - R_DrawFogBoundary = R_DrawFogBoundary_C; - R_FillColumnHoriz = R_FillColumnHorizP_C; + R_DrawSpanTranslucent = R_DrawSpanTranslucentP_C; + R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_C; + R_DrawSpanAddClamp = R_DrawSpanAddClampP_C; + R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_C; + R_FillColumn = R_FillColumnP_C; + R_FillAddColumn = R_FillAddColumn_C; + R_FillAddClampColumn = R_FillAddClampColumn_C; + R_FillSubClampColumn = R_FillSubClampColumn_C; + R_FillRevSubClampColumn = R_FillRevSubClampColumn_C; + R_DrawAddColumn = R_DrawAddColumnP_C; + R_DrawTlatedAddColumn = R_DrawTlatedAddColumnP_C; + R_DrawAddClampColumn = R_DrawAddClampColumnP_C; + R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumnP_C; + R_DrawSubClampColumn = R_DrawSubClampColumnP_C; + R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumnP_C; + R_DrawRevSubClampColumn = R_DrawRevSubClampColumnP_C; + R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumnP_C; + R_FillSpan = R_FillSpan_C; + R_DrawFogBoundary = R_DrawFogBoundary_C; + R_FillColumnHoriz = R_FillColumnHorizP_C; - R_DrawFogBoundary = R_DrawFogBoundary_C; - R_MapColoredPlane = R_MapColoredPlane_C; - R_DrawParticle = R_DrawParticle_C; + R_DrawFogBoundary = R_DrawFogBoundary_C; + R_MapColoredPlane = R_MapColoredPlane_C; + R_DrawParticle = R_DrawParticle_C; - tmvline1_add = tmvline1_add_C; - tmvline4_add = tmvline4_add_C; - tmvline1_addclamp = tmvline1_addclamp_C; - tmvline4_addclamp = tmvline4_addclamp_C; - tmvline1_subclamp = tmvline1_subclamp_C; - tmvline4_subclamp = tmvline4_subclamp_C; - tmvline1_revsubclamp = tmvline1_revsubclamp_C; - tmvline4_revsubclamp = tmvline4_revsubclamp_C; + tmvline1_add = tmvline1_add_C; + tmvline4_add = tmvline4_add_C; + tmvline1_addclamp = tmvline1_addclamp_C; + tmvline4_addclamp = tmvline4_addclamp_C; + tmvline1_subclamp = tmvline1_subclamp_C; + tmvline4_subclamp = tmvline4_subclamp_C; + tmvline1_revsubclamp = tmvline1_revsubclamp_C; + tmvline4_revsubclamp = tmvline4_revsubclamp_C; #ifdef X86_ASM - rt_copy1col = rt_copy1col_asm; - rt_copy4cols = rt_copy4cols_asm; - rt_map1col = rt_map1col_asm; - rt_shaded4cols = rt_shaded4cols_asm; - rt_add4cols = rt_add4cols_asm; - rt_addclamp4cols = rt_addclamp4cols_asm; + rt_copy1col = rt_copy1col_asm; + rt_copy4cols = rt_copy4cols_asm; + rt_map1col = rt_map1col_asm; + rt_shaded4cols = rt_shaded4cols_asm; + rt_add4cols = rt_add4cols_asm; + rt_addclamp4cols = rt_addclamp4cols_asm; #else - rt_copy1col = rt_copy1col_c; - rt_copy4cols = rt_copy4cols_c; - rt_map1col = rt_map1col_c; - rt_shaded4cols = rt_shaded4cols_c; - rt_add4cols = rt_add4cols_c; - rt_addclamp4cols = rt_addclamp4cols_c; -#endif - rt_shaded1col = rt_shaded1col_c; - rt_add1col = rt_add1col_c; - rt_addclamp1col = rt_addclamp1col_c; - rt_subclamp1col = rt_subclamp1col_c; - rt_revsubclamp1col = rt_revsubclamp1col_c; - rt_tlate1col = rt_tlate1col_c; - rt_tlateadd1col = rt_tlateadd1col_c; - rt_tlateaddclamp1col = rt_tlateaddclamp1col_c; - rt_tlatesubclamp1col = rt_tlatesubclamp1col_c; - rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_c; - rt_map4cols = rt_map4cols_c; - rt_subclamp4cols = rt_subclamp4cols_c; - rt_revsubclamp4cols = rt_revsubclamp4cols_c; - rt_tlate4cols = rt_tlate4cols_c; - rt_tlateadd4cols = rt_tlateadd4cols_c; - rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_c; - rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_c; - rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_c; - rt_initcols = rt_initcols_pal; - + rt_copy1col = rt_copy1col_c; + rt_copy4cols = rt_copy4cols_c; + rt_map1col = rt_map1col_c; + rt_shaded4cols = rt_shaded4cols_c; + rt_add4cols = rt_add4cols_c; + rt_addclamp4cols = rt_addclamp4cols_c; #endif + rt_shaded1col = rt_shaded1col_c; + rt_add1col = rt_add1col_c; + rt_addclamp1col = rt_addclamp1col_c; + rt_subclamp1col = rt_subclamp1col_c; + rt_revsubclamp1col = rt_revsubclamp1col_c; + rt_tlate1col = rt_tlate1col_c; + rt_tlateadd1col = rt_tlateadd1col_c; + rt_tlateaddclamp1col = rt_tlateaddclamp1col_c; + rt_tlatesubclamp1col = rt_tlatesubclamp1col_c; + rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_c; + rt_map4cols = rt_map4cols_c; + rt_subclamp4cols = rt_subclamp4cols_c; + rt_revsubclamp4cols = rt_revsubclamp4cols_c; + rt_tlate4cols = rt_tlate4cols_c; + rt_tlateadd4cols = rt_tlateadd4cols_c; + rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_c; + rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_c; + rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_c; + rt_initcols = rt_initcols_pal; + } } // [RH] Choose column drawers in a single place diff --git a/src/r_main.cpp b/src/r_main.cpp index b7723d07d..d85cd62a0 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -103,6 +103,7 @@ bool r_dontmaplines; CVAR (String, r_viewsize, "", CVAR_NOSET) CVAR (Bool, r_shadercolormaps, true, CVAR_ARCHIVE) +CVAR (Bool, r_swtruecolor, false, CVAR_ARCHIVE) double r_BaseVisibility; double r_WallVisibility; diff --git a/src/win32/fb_d3d9.cpp b/src/win32/fb_d3d9.cpp index 14a78d4cd..0cc9045ee 100644 --- a/src/win32/fb_d3d9.cpp +++ b/src/win32/fb_d3d9.cpp @@ -187,6 +187,7 @@ EXTERN_CVAR (Float, Gamma) EXTERN_CVAR (Bool, vid_vsync) EXTERN_CVAR (Float, transsouls) EXTERN_CVAR (Int, vid_refreshrate) +EXTERN_CVAR (Bool, r_swtruecolor) extern IDirect3D9 *D3D; @@ -765,11 +766,7 @@ void D3DFB::KillNativeTexs() bool D3DFB::CreateFBTexture () { -#ifndef PALETTEOUTPUT - D3DFORMAT FBFormat = D3DFMT_A8R8G8B8; -#else - D3DFORMAT FBFormat = D3DFMT_L8; -#endif + FBFormat = r_swtruecolor ? D3DFMT_A8R8G8B8 : D3DFMT_L8; if (FAILED(D3DDevice->CreateTexture(Width, Height, 1, D3DUSAGE_DYNAMIC, FBFormat, D3DPOOL_DEFAULT, &FBTexture, NULL))) { @@ -1310,20 +1307,45 @@ void D3DFB::Draw3DPart(bool copy3d) SUCCEEDED(FBTexture->LockRect (0, &lockrect, NULL, D3DLOCK_DISCARD))) || SUCCEEDED(FBTexture->LockRect (0, &lockrect, &texrect, 0))) { - if (lockrect.Pitch == Pitch * sizeof(canvas_pixel_t) && Pitch == Width) + if (r_swtruecolor && FBFormat == D3DFMT_A8R8G8B8) { - memcpy (lockrect.pBits, MemBuffer, Width * Height * sizeof(canvas_pixel_t)); + if (lockrect.Pitch == Pitch * sizeof(uint32_t) && Pitch == Width) + { + memcpy(lockrect.pBits, MemBuffer, Width * Height * sizeof(uint32_t)); + } + else + { + uint32_t *dest = (uint32_t *)lockrect.pBits; + uint32_t *src = MemBuffer; + for (int y = 0; y < Height; y++) + { + memcpy(dest, src, Width * sizeof(uint32_t)); + dest = reinterpret_cast(reinterpret_cast(dest) + lockrect.Pitch); + src += Pitch; + } + } + } + else if (!r_swtruecolor && FBFormat == D3DFMT_L8) + { + if (lockrect.Pitch == Pitch && Pitch == Width) + { + memcpy(lockrect.pBits, MemBuffer, Width * Height); + } + else + { + BYTE *dest = (BYTE *)lockrect.pBits; + BYTE *src = (BYTE *)MemBuffer; + for (int y = 0; y < Height; y++) + { + memcpy(dest, src, Width); + dest = reinterpret_cast(reinterpret_cast(dest) + lockrect.Pitch); + src += Pitch; + } + } } else { - canvas_pixel_t *dest = (canvas_pixel_t *)lockrect.pBits; - canvas_pixel_t *src = MemBuffer; - for (int y = 0; y < Height; y++) - { - memcpy (dest, src, Width * sizeof(canvas_pixel_t)); - dest = reinterpret_cast(reinterpret_cast(dest) + lockrect.Pitch); - src += Pitch; - } + memset(lockrect.pBits, 0, lockrect.Pitch * Height); } FBTexture->UnlockRect (0); } @@ -1355,11 +1377,10 @@ void D3DFB::Draw3DPart(bool copy3d) memset(Constant, 0, sizeof(Constant)); SetAlphaBlend(D3DBLENDOP(0)); EnableAlphaTest(FALSE); -#ifndef PALETTEOUTPUT - SetPixelShader(Shaders[SHADER_NormalColor]); -#else - SetPixelShader(Shaders[SHADER_NormalColorPal]); -#endif + if (r_swtruecolor) + SetPixelShader(Shaders[SHADER_NormalColor]); + else + SetPixelShader(Shaders[SHADER_NormalColorPal]); if (copy3d) { FBVERTEX verts[4]; @@ -1377,11 +1398,10 @@ void D3DFB::Draw3DPart(bool copy3d) realfixedcolormap->ColorizeStart[1]/2, realfixedcolormap->ColorizeStart[2]/2, 0); color1 = D3DCOLOR_COLORVALUE(realfixedcolormap->ColorizeEnd[0]/2, realfixedcolormap->ColorizeEnd[1]/2, realfixedcolormap->ColorizeEnd[2]/2, 1); -#ifndef PALETTEOUTPUT - SetPixelShader(Shaders[SHADER_SpecialColormap]); -#else - SetPixelShader(Shaders[SHADER_SpecialColormapPal]); -#endif + if (r_swtruecolor) + SetPixelShader(Shaders[SHADER_SpecialColormap]); + else + SetPixelShader(Shaders[SHADER_SpecialColormapPal]); } } else @@ -1392,11 +1412,10 @@ void D3DFB::Draw3DPart(bool copy3d) CalcFullscreenCoords(verts, Accel2D, false, color0, color1); D3DDevice->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, verts, sizeof(FBVERTEX)); } -#ifndef PALETTEOUTPUT - SetPixelShader(Shaders[SHADER_NormalColor]); -#else - SetPixelShader(Shaders[SHADER_NormalColorPal]); -#endif + if (r_swtruecolor) + SetPixelShader(Shaders[SHADER_NormalColor]); + else + SetPixelShader(Shaders[SHADER_NormalColorPal]); } //========================================================================== diff --git a/src/win32/win32iface.h b/src/win32/win32iface.h index 73a2c6966..d26765100 100644 --- a/src/win32/win32iface.h +++ b/src/win32/win32iface.h @@ -424,6 +424,7 @@ private: bool NeedPalUpdate; bool NeedGammaUpdate; int FBWidth, FBHeight; + D3DFORMAT FBFormat; bool VSync; RECT BlendingRect; int In2D; diff --git a/wadsrc/static/language.enu b/wadsrc/static/language.enu index 62761a417..001172185 100644 --- a/wadsrc/static/language.enu +++ b/wadsrc/static/language.enu @@ -1780,6 +1780,7 @@ DSPLYMNU_BRIGHTNESS = "Brightness"; DSPLYMNU_VSYNC = "Vertical Sync"; DSPLYMNU_CAPFPS = "Rendering Interpolation"; DSPLYMNU_COLUMNMETHOD = "Column render mode"; +DSPLYMNU_TRUECOLOR = "True color output"; DSPLYMNU_WIPETYPE = "Screen wipe style"; DSPLYMNU_SHOWENDOOM = "Show ENDOOM screen"; DSPLYMNU_PALLETEHACK = "DirectDraw palette hack"; // Not used diff --git a/wadsrc/static/menudef.txt b/wadsrc/static/menudef.txt index ff395ff2a..93e33ce79 100644 --- a/wadsrc/static/menudef.txt +++ b/wadsrc/static/menudef.txt @@ -661,6 +661,7 @@ OptionMenu "VideoOptions" Option "$DSPLYMNU_VSYNC", "vid_vsync", "OnOff" Option "$DSPLYMNU_CAPFPS", "cl_capfps", "OffOn" Option "$DSPLYMNU_COLUMNMETHOD", "r_columnmethod", "ColumnMethods" + Option "$DSPLYMNU_TRUECOLOR", "r_swtruecolor", "OnOff" StaticText " " Option "$DSPLYMNU_WIPETYPE", "wipetype", "Wipes" From 20b7743ec39088186e49142146d40c43e0cccae5 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 31 May 2016 01:49:39 +0200 Subject: [PATCH 04/94] Added R_SetColorMapLight and R_SetDSColorMapLight --- src/f_wipe.cpp | 37 +++++++------ src/r_draw.cpp | 27 ++++++++++ src/r_draw.h | 6 +++ src/r_main.h | 4 -- src/r_plane.cpp | 9 +--- src/r_segs.cpp | 90 ++++++++++--------------------- src/r_things.cpp | 24 ++------- src/textures/canvastexture.cpp | 9 ++-- src/v_draw.cpp | 49 +++++++++-------- src/v_video.cpp | 98 ++++++++++++++++++---------------- 10 files changed, 166 insertions(+), 187 deletions(-) diff --git a/src/f_wipe.cpp b/src/f_wipe.cpp index c6f20cadb..a86f93fc4 100644 --- a/src/f_wipe.cpp +++ b/src/f_wipe.cpp @@ -33,6 +33,8 @@ // SCREEN WIPE PACKAGE // +EXTERN_CVAR(Bool, r_swtruecolor) + static int CurrentWipeType; static short *wipe_scr_start; @@ -77,10 +79,8 @@ bool wipe_initMelt (int ticks) { int i, r; -#ifdef PALETTEOUTPUT // copy start screen to main screen - screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); -#endif + screen->DrawBlock(0, 0, SCREENWIDTH, SCREENHEIGHT, (canvas_pixel_t *)wipe_scr_start); // makes this wipe faster (in theory) // to have stuff in column-major format @@ -301,9 +301,6 @@ bool wipe_doBurn (int ticks) } else { -#ifndef PALETTEOUTPUT - // TO DO: RGB32k.All -#else int bglevel = 64-fglevel; DWORD *fg2rgb = Col2RGB8[fglevel]; DWORD *bg2rgb = Col2RGB8[bglevel]; @@ -311,7 +308,6 @@ bool wipe_doBurn (int ticks) DWORD bg = bg2rgb[fromold[x]]; fg = (fg+bg) | 0x1f07c1f; to[x] = RGB32k.All[fg & (fg>>15)]; -#endif done = false; } } @@ -342,9 +338,7 @@ bool wipe_doFade (int ticks) fade += ticks * 2; if (fade > 64) { -#ifdef PALETTEOUTPUT - screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_end); -#endif + screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (canvas_pixel_t *)wipe_scr_end); return true; } else @@ -391,14 +385,15 @@ static bool (*wipes[])(int) = // Returns true if the wipe should be performed. bool wipe_StartScreen (int type) { + if (r_swtruecolor) + return false; + CurrentWipeType = clamp(type, 0, wipe_NUMWIPES - 1); if (CurrentWipeType) { wipe_scr_start = new short[SCREENWIDTH * SCREENHEIGHT / 2]; -#ifdef PALETTEOUTPUT - screen->GetBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); -#endif + screen->GetBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (canvas_pixel_t *)wipe_scr_start); return true; } return false; @@ -406,13 +401,15 @@ bool wipe_StartScreen (int type) void wipe_EndScreen (void) { + if (r_swtruecolor) + return; + if (CurrentWipeType) { wipe_scr_end = new short[SCREENWIDTH * SCREENHEIGHT / 2]; -#ifdef PALETTEOUTPUT - screen->GetBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_end); - screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); // restore start scr. -#endif + screen->GetBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (canvas_pixel_t *)wipe_scr_end); + screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (canvas_pixel_t *)wipe_scr_start); // restore start scr. + // Initialize the wipe (*wipes[(CurrentWipeType-1)*3])(0); } @@ -423,6 +420,9 @@ bool wipe_ScreenWipe (int ticks) { bool rc; + if (r_swtruecolor) + return true; + if (CurrentWipeType == wipe_None) return true; @@ -436,6 +436,9 @@ bool wipe_ScreenWipe (int ticks) // Final things for the wipe void wipe_Cleanup() { + if (r_swtruecolor) + return; + if (wipe_scr_start != NULL) { delete[] wipe_scr_start; diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 83a4472f3..cd34a71b4 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -4601,3 +4601,30 @@ bool R_GetTransMaskDrawers (fixed_t (**tmvline1)(), void (**tmvline4)()) return false; } +void R_SetColorMapLight(BYTE *basecolormapdata, float light, int shade) +{ + if (r_swtruecolor) + { + dc_colormap = basecolormapdata; + dc_light = LIGHTSCALE(light, shade); + } + else + { + dc_colormap = basecolormapdata + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); + dc_light = 0; + } +} + +void R_SetDSColorMapLight(BYTE *basecolormapdata, float light, int shade) +{ + if (r_swtruecolor) + { + ds_colormap = basecolormapdata; + ds_light = LIGHTSCALE(light, shade); + } + else + { + ds_colormap = basecolormapdata + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); + ds_light = 0; + } +} diff --git a/src/r_draw.h b/src/r_draw.h index 17698c360..db109dbee 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -386,4 +386,10 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ // transmaskwallscan is like maskwallscan, but it can also blend to the background void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); +// Sets dc_colormap and dc_light to their appropriate values depending on the output format (pal vs true color) +void R_SetColorMapLight(BYTE *base_colormap, float light, int shade); + +// Same as R_SetColorMapLight, but for ds_colormap and ds_light +void R_SetDSColorMapLight(BYTE *base_colormap, float light, int shade); + #endif diff --git a/src/r_main.h b/src/r_main.h index 37a41a763..c1034ea3e 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -86,8 +86,6 @@ extern bool r_dontmaplines; // This is used instead of GETPALOOKUP when ds_colormap+dc_colormap is set to the base colormap #define LIGHTSCALE(vis,shade) ((shade)-FLOAT2FIXED(MIN(MAXLIGHTVIS,double(vis)))) -#ifndef PALETTEOUTPUT - // calculates the light constant passed to the shade_pal_index function inline uint32_t calc_light_multiplier(dsfixed_t light) { @@ -108,8 +106,6 @@ inline uint32_t shade_pal_index(uint32_t index, uint32_t light) return 0xff000000 | (red << 16) | (green << 8) | blue; } -#endif - extern double GlobVis; void R_SetVisibility(double visibility); diff --git a/src/r_plane.cpp b/src/r_plane.cpp index c8258a1ba..8d0c882ba 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -227,14 +227,7 @@ void R_MapPlane (int y, int x1) if (plane_shade) { // Determine lighting based on the span's distance from the viewer. -#ifndef PALETTEOUTPUT - ds_colormap = basecolormap->Maps; - ds_light = LIGHTSCALE(GlobVis * fabs(CenterY - y), planeshade); -#else - ds_colormap = basecolormap->Maps + (GETPALOOKUP ( - GlobVis * fabs(CenterY - y), planeshade) << COLORMAPSHIFT); - ds_light = 0; -#endif + R_SetDSColorMapLight(basecolormap->Maps, GlobVis * fabs(CenterY - y), planeshade); } #ifdef X86_ASM diff --git a/src/r_segs.cpp b/src/r_segs.cpp index fb27a99de..548cd994f 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -61,6 +61,8 @@ CVAR(Bool, r_np2, true, 0) //CVAR (Int, ty, 8, 0) //CVAR (Int, tx, 8, 0) +EXTERN_CVAR(Bool, r_swtruecolor) + #define HEIGHTBITS 12 #define HEIGHTSHIFT (FRACBITS-HEIGHTBITS) @@ -1138,13 +1140,7 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l if (!fixed) { // calculate lighting -#ifndef PALETTEOUTPUT - dc_colormap = basecolormapdata; - dc_light = LIGHTSCALE(light, wallshade); -#else - dc_colormap = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); - dc_light = 0; -#endif + R_SetColorMapLight(basecolormapdata, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1184,13 +1180,16 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l for (z = 0; z < 4; ++z) { light += rw_lightstep; -#ifndef PALETTEOUTPUT - palookupoffse[z] = basecolormapdata; - palookuplight[z] = LIGHTSCALE(light, wallshade); -#else - palookupoffse[z] = basecolormapdata + (GETPALOOKUP(12/*light*/, wallshade) << COLORMAPSHIFT); - palookuplight[z] = 0; -#endif + if (r_swtruecolor) + { + palookupoffse[z] = basecolormapdata; + palookuplight[z] = LIGHTSCALE(light, wallshade); + } + else + { + palookupoffse[z] = basecolormapdata + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + palookuplight[z] = 0; + } } } @@ -1245,13 +1244,7 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l if (!fixed) { // calculate lighting -#ifndef PALETTEOUTPUT - dc_colormap = basecolormapdata; - dc_light = LIGHTSCALE(light, wallshade); -#else - dc_colormap = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); - dc_light = 0; -#endif + R_SetColorMapLight(basecolormapdata, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1690,13 +1683,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f if (!fixed) { // calculate lighting -#ifndef PALETTEOUTPUT - dc_colormap = basecolormapdata; - dc_light = LIGHTSCALE(light, wallshade); -#else - dc_colormap = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); - dc_light = 0; -#endif + R_SetColorMapLight(basecolormapdata, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1734,12 +1721,15 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f for (z = 0; z < 4; ++z) { light += rw_lightstep; -#ifndef PALETTEOUTPUT - palookupoffse[z] = basecolormapdata; - palookuplight[z] = LIGHTSCALE(light, wallshade); -#else - palookupoffse[z] = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); -#endif + if (r_swtruecolor) + { + palookupoffse[z] = basecolormapdata; + palookuplight[z] = LIGHTSCALE(light, wallshade); + } + else + { + palookupoffse[z] = basecolormapdata + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + } } } @@ -1795,13 +1785,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f if (!fixed) { // calculate lighting -#ifndef PALETTEOUTPUT - dc_colormap = basecolormapdata; - dc_light = LIGHTSCALE(light, wallshade); -#else - dc_colormap = basecolormapdata + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - dc_light = 0; -#endif + R_SetColorMapLight(basecolormapdata, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -3295,13 +3279,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting -#ifndef PALETTEOUTPUT - dc_colormap = usecolormap->Maps; - dc_light = LIGHTSCALE(rw_light, wallshade); -#else - dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); - dc_light = 0; -#endif + R_SetColorMapLight(usecolormap->Maps, rw_light, wallshade); } R_WallSpriteColumn (R_DrawMaskedColumn); dc_x++; @@ -3311,13 +3289,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting -#ifndef PALETTEOUTPUT - dc_colormap = usecolormap->Maps; - dc_light = LIGHTSCALE(rw_light, wallshade); -#else - dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); - dc_light = 0; -#endif + R_SetColorMapLight(usecolormap->Maps, rw_light, wallshade); } rt_initcols(nullptr); for (int zz = 4; zz; --zz) @@ -3332,13 +3304,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting -#ifndef PALETTEOUTPUT - dc_colormap = usecolormap->Maps; - dc_light = LIGHTSCALE(rw_light, wallshade); -#else - dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); - dc_light = 0; -#endif + R_SetColorMapLight(usecolormap->Maps, rw_light, wallshade); } R_WallSpriteColumn (R_DrawMaskedColumn); dc_x++; diff --git a/src/r_things.cpp b/src/r_things.cpp index a6f6aea28..22538bd40 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -594,13 +594,7 @@ void R_DrawWallSprite(vissprite_t *spr) { if (calclighting) { // calculate lighting -#ifndef PALETTEOUTPUT - dc_colormap = usecolormap->Maps; - dc_light = LIGHTSCALE(rw_light, shade); -#else - dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, shade) << COLORMAPSHIFT); - dc_light = FLOAT2FIXED(MAXLIGHTVIS); -#endif + R_SetColorMapLight(usecolormap->Maps, rw_light, shade); } if (!R_ClipSpriteColumnWithPortals(spr)) R_WallSpriteColumn(R_DrawMaskedColumn); @@ -611,13 +605,7 @@ void R_DrawWallSprite(vissprite_t *spr) { if (calclighting) { // calculate lighting -#ifndef PALETTEOUTPUT - dc_colormap = usecolormap->Maps; - dc_light = LIGHTSCALE(rw_light, shade); -#else - dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, shade) << COLORMAPSHIFT); - dc_light = FLOAT2FIXED(MAXLIGHTVIS); -#endif + R_SetColorMapLight(usecolormap->Maps, rw_light, shade); } rt_initcols(nullptr); for (int zz = 4; zz; --zz) @@ -633,13 +621,7 @@ void R_DrawWallSprite(vissprite_t *spr) { if (calclighting) { // calculate lighting -#ifndef PALETTEOUTPUT - dc_colormap = usecolormap->Maps; - dc_light = LIGHTSCALE(rw_light, shade); -#else - dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, shade) << COLORMAPSHIFT); - dc_light = FLOAT2FIXED(MAXLIGHTVIS); -#endif + R_SetColorMapLight(usecolormap->Maps, rw_light, shade); } if (!R_ClipSpriteColumnWithPortals(spr)) R_WallSpriteColumn(R_DrawMaskedColumn); diff --git a/src/textures/canvastexture.cpp b/src/textures/canvastexture.cpp index 7388c1306..d1f70439f 100644 --- a/src/textures/canvastexture.cpp +++ b/src/textures/canvastexture.cpp @@ -106,10 +106,7 @@ void FCanvasTexture::MakeTexture () Canvas = new DSimpleCanvas (Width, Height); Canvas->Lock (); GC::AddSoftRoot(Canvas); -#ifndef PALETTEOUTPUT - Pixels = new BYTE[Width*Height]; - bPixelsAllocated = true; -#else + if (Width != Height || Width != Canvas->GetPitch()) { Pixels = new BYTE[Width*Height]; @@ -117,10 +114,10 @@ void FCanvasTexture::MakeTexture () } else { - Pixels = Canvas->GetBuffer(); + Pixels = (BYTE*)Canvas->GetBuffer(); bPixelsAllocated = false; } -#endif + // Draw a special "unrendered" initial texture into the buffer. memset (Pixels, 0, Width*Height/2); memset (Pixels+Width*Height/2, 255, Width*Height/2); diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 8853fc947..984375f25 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -77,6 +77,8 @@ extern "C" short spanend[MAXHEIGHT]; CVAR (Bool, hud_scale, false, CVAR_ARCHIVE); +EXTERN_CVAR(Bool, r_swtruecolor) + // For routines that take RGB colors, cache the previous lookup in case there // are several repetitions with the same color. static int LastPal = -1; @@ -1017,32 +1019,35 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) oldyyshifted = yy * GetPitch(); } -#ifndef PALETTEOUTPUT - canvas_pixel_t *spot = GetBuffer() + oldyyshifted + xx; + if (r_swtruecolor) + { + canvas_pixel_t *spot = GetBuffer() + oldyyshifted + xx; - uint32_t fg = shade_pal_index(basecolor, calc_light_multiplier(0)); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; + uint32_t fg = shade_pal_index(basecolor, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; - uint32_t bg_red = (*spot >> 16) & 0xff; - uint32_t bg_green = (*spot >> 8) & 0xff; - uint32_t bg_blue = (*spot) & 0xff; + uint32_t bg_red = (*spot >> 16) & 0xff; + uint32_t bg_green = (*spot >> 8) & 0xff; + uint32_t bg_blue = (*spot) & 0xff; - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; - *spot = 0xff000000 | (red << 16) | (green << 8) | blue; -#else - canvas_pixel_t *spot = GetBuffer() + oldyyshifted + xx; - DWORD *bg2rgb = Col2RGB8[1+level]; - DWORD *fg2rgb = Col2RGB8[63-level]; - DWORD fg = fg2rgb[basecolor]; - DWORD bg = bg2rgb[*spot]; - bg = (fg+bg) | 0x1f07c1f; - *spot = RGB32k.All[bg&(bg>>15)]; -#endif + *spot = 0xff000000 | (red << 16) | (green << 8) | blue; + } + else + { + canvas_pixel_t *spot = GetBuffer() + oldyyshifted + xx; + DWORD *bg2rgb = Col2RGB8[1+level]; + DWORD *fg2rgb = Col2RGB8[63-level]; + DWORD fg = fg2rgb[basecolor]; + DWORD bg = bg2rgb[*spot]; + bg = (fg+bg) | 0x1f07c1f; + *spot = RGB32k.All[bg&(bg>>15)]; + } } void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 realcolor) diff --git a/src/v_video.cpp b/src/v_video.cpp index b6a626753..2fb46e88a 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -65,6 +65,7 @@ #include "menu/menu.h" #include "r_data/voxels.h" +EXTERN_CVAR(Bool, r_swtruecolor) FRenderer *Renderer; @@ -367,65 +368,68 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) spot = Buffer + x1 + y1*Pitch; gap = Pitch - w; -#ifndef PALETTEOUTPUT - uint32_t fg = color.d; - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t alpha = (uint32_t)clamp(damount * 256 + 0.5f, 0.0f, 256.0f); - uint32_t inv_alpha = 256 - alpha; - - fg_red *= alpha; - fg_green *= alpha; - fg_blue *= alpha; - - for (y = h; y != 0; y--) + if (r_swtruecolor) { - for (x = w; x != 0; x--) + uint32_t fg = color.d; + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t alpha = (uint32_t)clamp(damount * 256 + 0.5f, 0.0f, 256.0f); + uint32_t inv_alpha = 256 - alpha; + + fg_red *= alpha; + fg_green *= alpha; + fg_blue *= alpha; + + for (y = h; y != 0; y--) { - uint32_t bg_red = (*spot >> 16) & 0xff; - uint32_t bg_green = (*spot >> 8) & 0xff; - uint32_t bg_blue = (*spot) & 0xff; + for (x = w; x != 0; x--) + { + uint32_t bg_red = (*spot >> 16) & 0xff; + uint32_t bg_green = (*spot >> 8) & 0xff; + uint32_t bg_blue = (*spot) & 0xff; - uint32_t red = (fg_red + bg_red * inv_alpha) / 256; - uint32_t green = (fg_green + bg_green * inv_alpha) / 256; - uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256; + uint32_t red = (fg_red + bg_red * inv_alpha) / 256; + uint32_t green = (fg_green + bg_green * inv_alpha) / 256; + uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256; - *spot = 0xff000000 | (red << 16) | (green << 8) | blue; - spot++; + *spot = 0xff000000 | (red << 16) | (green << 8) | blue; + spot++; + } + spot += gap; } - spot += gap; } -#else - DWORD *bg2rgb; - DWORD fg; - + else { - int amount; + DWORD *bg2rgb; + DWORD fg; - amount = (int)(damount * 64); - bg2rgb = Col2RGB8[64-amount]; - - fg = (((color.r * amount) >> 4) << 20) | - ((color.g * amount) >> 4) | - (((color.b * amount) >> 4) << 10); - } - - for (y = h; y != 0; y--) - { - for (x = w; x != 0; x--) { - DWORD bg; + int amount; - bg = bg2rgb[(*spot)&0xff]; - bg = (fg+bg) | 0x1f07c1f; - *spot = RGB32k.All[bg&(bg>>15)]; - spot++; + amount = (int)(damount * 64); + bg2rgb = Col2RGB8[64-amount]; + + fg = (((color.r * amount) >> 4) << 20) | + ((color.g * amount) >> 4) | + (((color.b * amount) >> 4) << 10); + } + + for (y = h; y != 0; y--) + { + for (x = w; x != 0; x--) + { + DWORD bg; + + bg = bg2rgb[(*spot)&0xff]; + bg = (fg+bg) | 0x1f07c1f; + *spot = RGB32k.All[bg&(bg>>15)]; + spot++; + } + spot += gap; } - spot += gap; } -#endif } //========================================================================== From 045bad1b5287d75f2c9f3d84e4a6cc2975499c18 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 31 May 2016 05:31:32 +0200 Subject: [PATCH 05/94] Removed the need for the pixel_canvas_t typedef --- src/basictypes.h | 6 -- src/f_wipe.cpp | 14 +-- src/m_misc.cpp | 2 - src/r_draw.cpp | 234 ++++++++++++++++++++--------------------- src/r_draw.h | 12 +-- src/r_drawt.cpp | 71 +++++++------ src/r_drawt_rgba.cpp | 94 ++++++++--------- src/r_main.cpp | 34 ++++-- src/r_plane.cpp | 130 ++++++++++++++++++++++- src/r_plane.h | 4 + src/r_segs.cpp | 93 ++++++++++------ src/r_things.cpp | 23 ++-- src/v_draw.cpp | 112 +++++++++++++++----- src/v_video.cpp | 35 ++++-- src/v_video.h | 14 +-- src/win32/fb_d3d9.cpp | 6 +- src/win32/fb_ddraw.cpp | 4 +- src/win32/win32iface.h | 2 +- 18 files changed, 561 insertions(+), 329 deletions(-) diff --git a/src/basictypes.h b/src/basictypes.h index 45e33a4a7..ff2cd972e 100644 --- a/src/basictypes.h +++ b/src/basictypes.h @@ -66,12 +66,6 @@ union QWORD_UNION typedef SDWORD fixed_t; typedef DWORD dsfixed_t; // fixedpt used by span drawer -#ifndef PALETTEOUTPUT -typedef uint32_t canvas_pixel_t; -#else -typedef BYTE canvas_pixel_t; -#endif - #define FIXED_MAX (signed)(0x7fffffff) #define FIXED_MIN (signed)(0x80000000) diff --git a/src/f_wipe.cpp b/src/f_wipe.cpp index a86f93fc4..7e1ec678e 100644 --- a/src/f_wipe.cpp +++ b/src/f_wipe.cpp @@ -80,7 +80,7 @@ bool wipe_initMelt (int ticks) int i, r; // copy start screen to main screen - screen->DrawBlock(0, 0, SCREENWIDTH, SCREENHEIGHT, (canvas_pixel_t *)wipe_scr_start); + screen->DrawBlock(0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); // makes this wipe faster (in theory) // to have stuff in column-major format @@ -273,7 +273,7 @@ bool wipe_doBurn (int ticks) // Draw the screen int xstep, ystep, firex, firey; int x, y; - canvas_pixel_t *to; + BYTE *to; BYTE *fromold, *fromnew; const int SHIFT = 16; @@ -338,7 +338,7 @@ bool wipe_doFade (int ticks) fade += ticks * 2; if (fade > 64) { - screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (canvas_pixel_t *)wipe_scr_end); + screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_end); return true; } else @@ -349,7 +349,7 @@ bool wipe_doFade (int ticks) DWORD *bg2rgb = Col2RGB8[bglevel]; BYTE *fromnew = (BYTE *)wipe_scr_end; BYTE *fromold = (BYTE *)wipe_scr_start; - canvas_pixel_t *to = screen->GetBuffer(); + BYTE *to = screen->GetBuffer(); for (y = 0; y < SCREENHEIGHT; y++) { @@ -393,7 +393,7 @@ bool wipe_StartScreen (int type) if (CurrentWipeType) { wipe_scr_start = new short[SCREENWIDTH * SCREENHEIGHT / 2]; - screen->GetBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (canvas_pixel_t *)wipe_scr_start); + screen->GetBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); return true; } return false; @@ -407,8 +407,8 @@ void wipe_EndScreen (void) if (CurrentWipeType) { wipe_scr_end = new short[SCREENWIDTH * SCREENHEIGHT / 2]; - screen->GetBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (canvas_pixel_t *)wipe_scr_end); - screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (canvas_pixel_t *)wipe_scr_start); // restore start scr. + screen->GetBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_end); + screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); // restore start scr. // Initialize the wipe (*wipes[(CurrentWipeType-1)*3])(0); diff --git a/src/m_misc.cpp b/src/m_misc.cpp index 79416c31d..87f61f253 100644 --- a/src/m_misc.cpp +++ b/src/m_misc.cpp @@ -655,7 +655,6 @@ static bool FindFreeName (FString &fullname, const char *extension) void M_ScreenShot (const char *filename) { -#ifdef PALETTEOUTPUT FILE *file; FString autoname; bool writepcx = (stricmp (screenshot_type, "pcx") == 0); // PNG is the default @@ -744,7 +743,6 @@ void M_ScreenShot (const char *filename) Printf ("Could not create screenshot.\n"); } } -#endif } CCMD (screenshot) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index cd34a71b4..f939406bb 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -65,7 +65,7 @@ extern int ST_Y; BYTE* viewimage; extern "C" { int ylookup[MAXHEIGHT]; -canvas_pixel_t *dc_destorg; +BYTE* dc_destorg; } int scaledviewwidth; @@ -99,6 +99,7 @@ void (*R_DrawSpanMaskedAddClamp)(void); void (*R_FillSpan)(void); void (*R_FillColumnHoriz)(void); void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip); +void (*R_MapTiltedPlane)(int y, int x1); void (*R_MapColoredPlane)(int y, int x1); void (*R_DrawParticle)(vissprite_t *); fixed_t (*tmvline1_add)(); @@ -133,7 +134,7 @@ void (*rt_tlateadd4cols)(int sx, int yl, int yh); void (*rt_tlateaddclamp4cols)(int sx, int yl, int yh); void (*rt_tlatesubclamp4cols)(int sx, int yl, int yh); void (*rt_tlaterevsubclamp4cols)(int sx, int yl, int yh); -void (*rt_initcols)(canvas_pixel_t *buffer); +void (*rt_initcols)(BYTE *buffer); // // R_DrawColumn @@ -158,7 +159,7 @@ DWORD *dc_destblend; // blending lookups // first pixel in a column (possibly virtual) const BYTE* dc_source; -canvas_pixel_t* dc_dest; +BYTE* dc_dest; int dc_count; DWORD vplce[4]; @@ -236,7 +237,7 @@ void R_InitShadeMaps() void R_DrawColumnP_C (void) { int count; - canvas_pixel_t* dest; + BYTE* dest; fixed_t frac; fixed_t fracstep; @@ -281,7 +282,7 @@ void R_DrawColumnP_C (void) void R_DrawColumnP_RGBA_C() { int count; - canvas_pixel_t* dest; + uint32_t* dest; fixed_t frac; fixed_t fracstep; @@ -292,7 +293,7 @@ void R_DrawColumnP_RGBA_C() return; // Framebuffer destination address. - dest = dc_dest; + dest = (uint32_t*)dc_dest; uint32_t light = calc_light_multiplier(dc_light); @@ -328,7 +329,7 @@ void R_DrawColumnP_RGBA_C() void R_FillColumnP_C (void) { int count; - canvas_pixel_t* dest; + BYTE* dest; count = dc_count; @@ -352,14 +353,14 @@ void R_FillColumnP_C (void) void R_FillColumnP_RGBA() { int count; - canvas_pixel_t* dest; + uint32_t* dest; count = dc_count; if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; uint32_t light = calc_light_multiplier(dc_light); @@ -378,7 +379,7 @@ void R_FillColumnP_RGBA() void R_FillAddColumn_C (void) { int count; - canvas_pixel_t *dest; + BYTE *dest; count = dc_count; if (count <= 0) @@ -405,13 +406,13 @@ void R_FillAddColumn_C (void) void R_FillAddColumn_RGBA_C() { int count; - canvas_pixel_t *dest; + uint32_t *dest; count = dc_count; if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; int pitch = dc_pitch; uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; @@ -436,7 +437,7 @@ void R_FillAddColumn_RGBA_C() void R_FillAddClampColumn_C (void) { int count; - canvas_pixel_t *dest; + BYTE *dest; count = dc_count; if (count <= 0) @@ -469,13 +470,13 @@ void R_FillAddClampColumn_C (void) void R_FillAddClampColumn_RGBA() { int count; - canvas_pixel_t *dest; + uint32_t *dest; count = dc_count; if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; int pitch = dc_pitch; uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; @@ -500,7 +501,7 @@ void R_FillAddClampColumn_RGBA() void R_FillSubClampColumn_C (void) { int count; - canvas_pixel_t *dest; + BYTE *dest; count = dc_count; if (count <= 0) @@ -532,13 +533,13 @@ void R_FillSubClampColumn_C (void) void R_FillSubClampColumn_RGBA() { int count; - canvas_pixel_t *dest; + uint32_t *dest; count = dc_count; if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; int pitch = dc_pitch; uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; @@ -563,7 +564,7 @@ void R_FillSubClampColumn_RGBA() void R_FillRevSubClampColumn_C (void) { int count; - canvas_pixel_t *dest; + BYTE *dest; count = dc_count; if (count <= 0) @@ -595,13 +596,13 @@ void R_FillRevSubClampColumn_C (void) void R_FillRevSubClampColumn_RGBA() { int count; - canvas_pixel_t *dest; + uint32_t *dest; count = dc_count; if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; int pitch = dc_pitch; uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; @@ -673,7 +674,7 @@ void R_InitFuzzTable (int fuzzoff) void R_DrawFuzzColumnP_C (void) { int count; - canvas_pixel_t *dest; + BYTE *dest; // Adjust borders. Low... if (dc_yl == 0) @@ -745,7 +746,7 @@ void R_DrawFuzzColumnP_C (void) void R_DrawFuzzColumnP_RGBA_C() { int count; - canvas_pixel_t *dest; + uint32_t *dest; // Adjust borders. Low... if (dc_yl == 0) @@ -763,7 +764,7 @@ void R_DrawFuzzColumnP_RGBA_C() count++; - dest = ylookup[dc_yl] + dc_x + dc_destorg; + dest = ylookup[dc_yl] + dc_x + (uint32_t*)dc_destorg; // Note: this implementation assumes this function is only used for the pinky shadow effect (i.e. no other fancy colormap than black) // I'm not sure if this is really always the case or not. @@ -890,7 +891,7 @@ algorithm that uses RGB tables. void R_DrawAddColumnP_C (void) { int count; - canvas_pixel_t *dest; + BYTE *dest; fixed_t frac; fixed_t fracstep; @@ -928,7 +929,7 @@ void R_DrawAddColumnP_C (void) void R_DrawAddColumnP_RGBA_C() { int count; - canvas_pixel_t *dest; + uint32_t *dest; fixed_t frac; fixed_t fracstep; @@ -936,7 +937,7 @@ void R_DrawAddColumnP_RGBA_C() if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; @@ -980,7 +981,7 @@ void R_DrawAddColumnP_RGBA_C() void R_DrawTranslatedColumnP_C (void) { int count; - canvas_pixel_t* dest; + BYTE* dest; fixed_t frac; fixed_t fracstep; @@ -1012,7 +1013,7 @@ void R_DrawTranslatedColumnP_C (void) void R_DrawTranslatedColumnP_RGBA_C() { int count; - canvas_pixel_t* dest; + uint32_t* dest; fixed_t frac; fixed_t fracstep; @@ -1022,7 +1023,7 @@ void R_DrawTranslatedColumnP_RGBA_C() uint32_t light = calc_light_multiplier(dc_light); - dest = dc_dest; + dest = (uint32_t*)dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; @@ -1047,7 +1048,7 @@ void R_DrawTranslatedColumnP_RGBA_C() void R_DrawTlatedAddColumnP_C() { int count; - canvas_pixel_t *dest; + BYTE *dest; fixed_t frac; fixed_t fracstep; @@ -1086,7 +1087,7 @@ void R_DrawTlatedAddColumnP_C() void R_DrawTlatedAddColumnP_RGBA_C() { int count; - canvas_pixel_t *dest; + uint32_t *dest; fixed_t frac; fixed_t fracstep; @@ -1096,7 +1097,7 @@ void R_DrawTlatedAddColumnP_RGBA_C() uint32_t light = calc_light_multiplier(dc_light); - dest = dc_dest; + dest = (uint32_t*)dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; @@ -1135,7 +1136,7 @@ void R_DrawTlatedAddColumnP_RGBA_C() void R_DrawShadedColumnP_C (void) { int count; - canvas_pixel_t *dest; + BYTE *dest; fixed_t frac, fracstep; count = dc_count; @@ -1170,7 +1171,7 @@ void R_DrawShadedColumnP_C (void) void R_DrawShadedColumnP_RGBA_C() { int count; - canvas_pixel_t *dest; + uint32_t *dest; fixed_t frac, fracstep; count = dc_count; @@ -1178,7 +1179,7 @@ void R_DrawShadedColumnP_RGBA_C() if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; @@ -1217,7 +1218,7 @@ void R_DrawShadedColumnP_RGBA_C() void R_DrawAddClampColumnP_C () { int count; - canvas_pixel_t *dest; + BYTE *dest; fixed_t frac; fixed_t fracstep; @@ -1234,24 +1235,20 @@ void R_DrawAddClampColumnP_C () const BYTE *source = dc_source; BYTE *colormap = dc_colormap; int pitch = dc_pitch; - uint32_t light = calc_light_multiplier(dc_light); + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; do { - uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; + DWORD a = fg2rgb[colormap[source[frac >> FRACBITS]]] + bg2rgb[*dest]; + DWORD b = a; - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a >> 15)]; dest += pitch; frac += fracstep; } while (--count); @@ -1261,7 +1258,7 @@ void R_DrawAddClampColumnP_C () void R_DrawAddClampColumnP_RGBA_C() { int count; - canvas_pixel_t *dest; + uint32_t *dest; fixed_t frac; fixed_t fracstep; @@ -1269,7 +1266,7 @@ void R_DrawAddClampColumnP_RGBA_C() if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; @@ -1306,7 +1303,7 @@ void R_DrawAddClampColumnP_RGBA_C() void R_DrawAddClampTranslatedColumnP_C () { int count; - canvas_pixel_t *dest; + BYTE *dest; fixed_t frac; fixed_t fracstep; @@ -1347,7 +1344,7 @@ void R_DrawAddClampTranslatedColumnP_C () void R_DrawAddClampTranslatedColumnP_RGBA_C() { int count; - canvas_pixel_t *dest; + uint32_t *dest; fixed_t frac; fixed_t fracstep; @@ -1355,7 +1352,7 @@ void R_DrawAddClampTranslatedColumnP_RGBA_C() if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; @@ -1393,7 +1390,7 @@ void R_DrawAddClampTranslatedColumnP_RGBA_C() void R_DrawSubClampColumnP_C () { int count; - canvas_pixel_t *dest; + BYTE *dest; fixed_t frac; fixed_t fracstep; @@ -1432,7 +1429,7 @@ void R_DrawSubClampColumnP_C () void R_DrawSubClampColumnP_RGBA_C() { int count; - canvas_pixel_t *dest; + uint32_t *dest; fixed_t frac; fixed_t fracstep; @@ -1440,7 +1437,7 @@ void R_DrawSubClampColumnP_RGBA_C() if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; @@ -1477,7 +1474,7 @@ void R_DrawSubClampColumnP_RGBA_C() void R_DrawSubClampTranslatedColumnP_C () { int count; - canvas_pixel_t *dest; + BYTE *dest; fixed_t frac; fixed_t fracstep; @@ -1517,7 +1514,7 @@ void R_DrawSubClampTranslatedColumnP_C () void R_DrawSubClampTranslatedColumnP_RGBA_C() { int count; - canvas_pixel_t *dest; + uint32_t *dest; fixed_t frac; fixed_t fracstep; @@ -1525,7 +1522,7 @@ void R_DrawSubClampTranslatedColumnP_RGBA_C() if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; @@ -1563,7 +1560,7 @@ void R_DrawSubClampTranslatedColumnP_RGBA_C() void R_DrawRevSubClampColumnP_C () { int count; - canvas_pixel_t *dest; + BYTE *dest; fixed_t frac; fixed_t fracstep; @@ -1602,7 +1599,7 @@ void R_DrawRevSubClampColumnP_C () void R_DrawRevSubClampColumnP_RGBA_C() { int count; - canvas_pixel_t *dest; + uint32_t *dest; fixed_t frac; fixed_t fracstep; @@ -1610,7 +1607,7 @@ void R_DrawRevSubClampColumnP_RGBA_C() if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; @@ -1647,7 +1644,7 @@ void R_DrawRevSubClampColumnP_RGBA_C() void R_DrawRevSubClampTranslatedColumnP_C () { int count; - canvas_pixel_t *dest; + BYTE *dest; fixed_t frac; fixed_t fracstep; @@ -1687,7 +1684,7 @@ void R_DrawRevSubClampTranslatedColumnP_C () void R_DrawRevSubClampTranslatedColumnP_RGBA_C() { int count; - canvas_pixel_t *dest; + uint32_t *dest; fixed_t frac; fixed_t fracstep; @@ -1695,7 +1692,7 @@ void R_DrawRevSubClampTranslatedColumnP_RGBA_C() if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; @@ -1855,7 +1852,7 @@ void R_DrawSpanP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + BYTE* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -1927,7 +1924,7 @@ void R_DrawSpanP_RGBA_C() dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + uint32_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -1945,7 +1942,7 @@ void R_DrawSpanP_RGBA_C() xfrac = ds_xfrac; yfrac = ds_yfrac; - dest = ylookup[ds_y] + ds_x1 + dc_destorg; + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; count = ds_x2 - ds_x1 + 1; @@ -2051,7 +2048,7 @@ void R_DrawSpanMaskedP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + BYTE* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -2114,7 +2111,7 @@ void R_DrawSpanMaskedP_RGBA_C() dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + uint32_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -2125,7 +2122,7 @@ void R_DrawSpanMaskedP_RGBA_C() xfrac = ds_xfrac; yfrac = ds_yfrac; - dest = ylookup[ds_y] + ds_x1 + dc_destorg; + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; count = ds_x2 - ds_x1 + 1; @@ -2179,7 +2176,7 @@ void R_DrawSpanTranslucentP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + BYTE* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -2241,7 +2238,7 @@ void R_DrawSpanTranslucentP_RGBA_C() dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + uint32_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -2252,7 +2249,7 @@ void R_DrawSpanTranslucentP_RGBA_C() xfrac = ds_xfrac; yfrac = ds_yfrac; - dest = ylookup[ds_y] + ds_x1 + dc_destorg; + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; count = ds_x2 - ds_x1 + 1; @@ -2323,7 +2320,7 @@ void R_DrawSpanMaskedTranslucentP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + BYTE* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -2399,7 +2396,7 @@ void R_DrawSpanMaskedTranslucentP_RGBA_C() dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + uint32_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -2412,7 +2409,7 @@ void R_DrawSpanMaskedTranslucentP_RGBA_C() xfrac = ds_xfrac; yfrac = ds_yfrac; - dest = ylookup[ds_y] + ds_x1 + dc_destorg; + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; count = ds_x2 - ds_x1 + 1; @@ -2491,7 +2488,7 @@ void R_DrawSpanAddClampP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + BYTE* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -2561,7 +2558,7 @@ void R_DrawSpanAddClampP_RGBA_C() dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + uint32_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -2574,7 +2571,7 @@ void R_DrawSpanAddClampP_RGBA_C() xfrac = ds_xfrac; yfrac = ds_yfrac; - dest = ylookup[ds_y] + ds_x1 + dc_destorg; + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; count = ds_x2 - ds_x1 + 1; @@ -2643,7 +2640,7 @@ void R_DrawSpanMaskedAddClampP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + BYTE* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -2725,7 +2722,7 @@ void R_DrawSpanMaskedAddClampP_RGBA_C() dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + uint32_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -2738,7 +2735,7 @@ void R_DrawSpanMaskedAddClampP_RGBA_C() xfrac = ds_xfrac; yfrac = ds_yfrac; - dest = ylookup[ds_y] + ds_x1 + dc_destorg; + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; count = ds_x2 - ds_x1 + 1; @@ -2819,7 +2816,7 @@ void R_FillSpan_C (void) void R_FillSpan_RGBA() { - canvas_pixel_t *dest = ylookup[ds_y] + ds_x1 + dc_destorg; + uint32_t *dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; int count = (ds_x2 - ds_x1 + 1); uint32_t light = calc_light_multiplier(ds_light); uint32_t color = shade_pal_index(ds_color, light); @@ -2843,7 +2840,7 @@ extern "C" void R_SetupDrawSlabC(const BYTE *colormap) slabcolormap = colormap; } -extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, canvas_pixel_t *p) +extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p) { int x; const BYTE *colormap = slabcolormap; @@ -3017,7 +3014,7 @@ DWORD vlinec1 () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int bits = vlinebits; int pitch = dc_pitch; @@ -3039,7 +3036,7 @@ DWORD vlinec1_RGBA() BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int bits = vlinebits; int pitch = dc_pitch; @@ -3058,7 +3055,7 @@ DWORD vlinec1_RGBA() #if !defined(X86_ASM) void vlinec4 () { - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int count = dc_count; int bits = vlinebits; DWORD place; @@ -3076,7 +3073,7 @@ void vlinec4 () void vlinec4_RGBA() { - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int count = dc_count; int bits = vlinebits; @@ -3162,7 +3159,7 @@ DWORD mvlinec1 () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int bits = mvlinebits; int pitch = dc_pitch; @@ -3188,7 +3185,7 @@ DWORD mvlinec1_RGBA() BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int bits = mvlinebits; int pitch = dc_pitch; @@ -3211,7 +3208,7 @@ DWORD mvlinec1_RGBA() #if !defined(X86_ASM) void mvlinec4 () { - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int count = dc_count; int bits = mvlinebits; DWORD place; @@ -3230,7 +3227,7 @@ void mvlinec4 () void mvlinec4_RGBA() { - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int count = dc_count; int bits = mvlinebits; DWORD place; @@ -3260,7 +3257,7 @@ extern int wallshade; static void R_DrawFogBoundarySection (int y, int y2, int x1) { BYTE *colormap = dc_colormap; - canvas_pixel_t *dest = ylookup[y] + dc_destorg; + BYTE *dest = ylookup[y] + dc_destorg; for (; y < y2; ++y) { @@ -3278,7 +3275,7 @@ static void R_DrawFogBoundaryLine (int y, int x) { int x2 = spanend[y]; BYTE *colormap = dc_colormap; - canvas_pixel_t *dest = ylookup[y] + dc_destorg; + BYTE *dest = ylookup[y] + dc_destorg; do { @@ -3380,7 +3377,7 @@ void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip) static void R_DrawFogBoundarySection_RGBA(int y, int y2, int x1) { BYTE *colormap = dc_colormap; - canvas_pixel_t *dest = ylookup[y] + dc_destorg; + uint32_t *dest = ylookup[y] + (uint32_t*)dc_destorg; uint32_t light = calc_light_multiplier(dc_light); @@ -3400,7 +3397,7 @@ static void R_DrawFogBoundaryLine_RGBA(int y, int x) { int x2 = spanend[y]; BYTE *colormap = dc_colormap; - canvas_pixel_t *dest = ylookup[y] + dc_destorg; + uint32_t *dest = ylookup[y] + (uint32_t*)dc_destorg; uint32_t light = calc_light_multiplier(dc_light); @@ -3518,7 +3515,7 @@ fixed_t tmvline1_add_C () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; @@ -3551,7 +3548,7 @@ fixed_t tmvline1_add_RGBA() BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; @@ -3589,7 +3586,7 @@ fixed_t tmvline1_add_RGBA() void tmvline4_add_C () { - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int count = dc_count; int bits = tmvlinebits; @@ -3622,13 +3619,10 @@ void tmvline4_add_C () void tmvline4_add_RGBA() { - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int count = dc_count; int bits = tmvlinebits; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - uint32_t light[4]; light[0] = calc_light_multiplier(palookuplight[0]); light[1] = calc_light_multiplier(palookuplight[1]); @@ -3670,7 +3664,7 @@ fixed_t tmvline1_addclamp_C () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; @@ -3708,7 +3702,7 @@ fixed_t tmvline1_addclamp_RGBA() BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; @@ -3746,7 +3740,7 @@ fixed_t tmvline1_addclamp_RGBA() void tmvline4_addclamp_C () { - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int count = dc_count; int bits = tmvlinebits; @@ -3778,7 +3772,7 @@ void tmvline4_addclamp_C () void tmvline4_addclamp_RGBA() { - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int count = dc_count; int bits = tmvlinebits; @@ -3823,7 +3817,7 @@ fixed_t tmvline1_subclamp_C () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; @@ -3858,7 +3852,7 @@ fixed_t tmvline1_subclamp_RGBA() BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; @@ -3893,7 +3887,7 @@ fixed_t tmvline1_subclamp_RGBA() void tmvline4_subclamp_C () { - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int count = dc_count; int bits = tmvlinebits; @@ -3924,7 +3918,7 @@ void tmvline4_subclamp_C () void tmvline4_subclamp_RGBA() { - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int count = dc_count; int bits = tmvlinebits; @@ -3969,7 +3963,7 @@ fixed_t tmvline1_revsubclamp_C () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; @@ -4004,7 +3998,7 @@ fixed_t tmvline1_revsubclamp_RGBA() BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; @@ -4039,7 +4033,7 @@ fixed_t tmvline1_revsubclamp_RGBA() void tmvline4_revsubclamp_C () { - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int count = dc_count; int bits = tmvlinebits; @@ -4070,7 +4064,7 @@ void tmvline4_revsubclamp_C () void tmvline4_revsubclamp_RGBA() { - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int count = dc_count; int bits = tmvlinebits; @@ -4168,6 +4162,7 @@ void R_InitColumnDrawers () R_FillColumnHoriz = R_FillColumnHorizP_RGBA_C; R_DrawFogBoundary = R_DrawFogBoundary_RGBA; + R_MapTiltedPlane = R_MapColoredPlane_RGBA; R_MapColoredPlane = R_MapColoredPlane_RGBA; R_DrawParticle = R_DrawParticle_RGBA; @@ -4262,6 +4257,7 @@ void R_InitColumnDrawers () R_FillColumnHoriz = R_FillColumnHorizP_C; R_DrawFogBoundary = R_DrawFogBoundary_C; + R_MapTiltedPlane = R_MapColoredPlane_C; R_MapColoredPlane = R_MapColoredPlane_C; R_DrawParticle = R_DrawParticle_C; diff --git a/src/r_draw.h b/src/r_draw.h index db109dbee..2348914b6 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -45,7 +45,7 @@ extern "C" DWORD *dc_destblend; // first pixel in a column extern "C" const BYTE* dc_source; -extern "C" canvas_pixel_t *dc_dest, *dc_destorg; +extern "C" BYTE* dc_dest, *dc_destorg; extern "C" int dc_count; extern "C" DWORD vplce[4]; @@ -55,7 +55,7 @@ extern "C" fixed_t palookuplight[4]; extern "C" const BYTE* bufplce[4]; // [RH] Temporary buffer for column drawing -extern "C" canvas_pixel_t *dc_temp; +extern "C" BYTE *dc_temp; extern "C" unsigned int dc_tspans[4][MAXHEIGHT]; extern "C" unsigned int *dc_ctspan[4]; extern "C" unsigned int horizspans[4]; @@ -228,13 +228,13 @@ extern void (*rt_tlateaddclamp4cols)(int sx, int yl, int yh); extern void (*rt_tlatesubclamp4cols)(int sx, int yl, int yh); extern void (*rt_tlaterevsubclamp4cols)(int sx, int yl, int yh); -extern void (*rt_initcols)(canvas_pixel_t *buffer); +extern void (*rt_initcols)(BYTE *buffer); void rt_draw4cols (int sx); // [RH] Preps the temporary horizontal buffer. -void rt_initcols_pal (canvas_pixel_t *buffer); -void rt_initcols_rgba (canvas_pixel_t *buffer); +void rt_initcols_pal (BYTE *buffer); +void rt_initcols_rgba (BYTE *buffer); extern void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip); @@ -313,7 +313,7 @@ void R_FillSpan_RGBA_C(void); #endif extern "C" void R_SetupDrawSlab(const BYTE *colormap); -extern "C" void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, canvas_pixel_t *p); +extern "C" void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); extern "C" int ds_y; extern "C" int ds_x1; diff --git a/src/r_drawt.cpp b/src/r_drawt.cpp index 9520f59b3..485ed7ab3 100644 --- a/src/r_drawt.cpp +++ b/src/r_drawt.cpp @@ -47,6 +47,8 @@ #include "r_things.h" #include "v_video.h" +EXTERN_CVAR(Bool, r_swtruecolor) + // I should have commented this stuff better. // // dc_temp is the buffer R_DrawColumnHoriz writes into. @@ -57,8 +59,8 @@ // dc_ctspan is advanced while drawing into dc_temp. // horizspan is advanced up to dc_ctspan when drawing from dc_temp to the screen. -canvas_pixel_t dc_tempbuff[MAXHEIGHT*4]; -canvas_pixel_t *dc_temp; +BYTE dc_tempbuff[MAXHEIGHT*4]; +BYTE *dc_temp; unsigned int dc_tspans[4][MAXHEIGHT]; unsigned int *dc_ctspan[4]; unsigned int *horizspan[4]; @@ -73,8 +75,8 @@ extern "C" void R_SetupAddClampCol(); // Copies one span at hx to the screen at sx. void rt_copy1col_c (int hx, int sx, int yl, int yh) { - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -148,8 +150,8 @@ void rt_copy4cols_c (int sx, int yl, int yh) void rt_map1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -183,8 +185,8 @@ void rt_map1col_c (int hx, int sx, int yl, int yh) void rt_map4cols_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -227,7 +229,7 @@ void rt_map4cols_c (int sx, int yl, int yh) void rt_Translate1col(const BYTE *translation, int hx, int yl, int yh) { int count = yh - yl + 1; - canvas_pixel_t *source = &dc_temp[yl*4 + hx]; + BYTE *source = &dc_temp[yl*4 + hx]; // Things we do to hit the compiler's optimizer with a clue bat: // 1. Parallelism is explicitly spelled out by using a separate @@ -274,7 +276,7 @@ void rt_Translate1col(const BYTE *translation, int hx, int yl, int yh) void rt_Translate4cols(const BYTE *translation, int yl, int yh) { int count = yh - yl + 1; - canvas_pixel_t *source = &dc_temp[yl*4]; + BYTE *source = &dc_temp[yl*4]; int c0, c1; BYTE b0, b1; @@ -330,8 +332,8 @@ void rt_tlate4cols_c (int sx, int yl, int yh) void rt_add1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -364,8 +366,8 @@ void rt_add1col_c (int hx, int sx, int yl, int yh) void rt_add4cols_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -435,8 +437,8 @@ void rt_tlateadd4cols_c (int sx, int yl, int yh) void rt_shaded1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -467,8 +469,8 @@ void rt_shaded1col_c (int hx, int sx, int yl, int yh) void rt_shaded4cols_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -513,8 +515,8 @@ void rt_shaded4cols_c (int sx, int yl, int yh) void rt_addclamp1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -550,8 +552,8 @@ void rt_addclamp1col_c (int hx, int sx, int yl, int yh) void rt_addclamp4cols_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -629,8 +631,8 @@ void rt_tlateaddclamp4cols_c (int sx, int yl, int yh) void rt_subclamp1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -664,8 +666,8 @@ void rt_subclamp1col_c (int hx, int sx, int yl, int yh) void rt_subclamp4cols_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -738,8 +740,8 @@ void rt_tlatesubclamp4cols_c (int sx, int yl, int yh) void rt_revsubclamp1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -773,8 +775,8 @@ void rt_revsubclamp1col_c (int hx, int sx, int yl, int yh) void rt_revsubclamp4cols_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -1007,7 +1009,7 @@ void rt_draw4cols (int sx) // Before each pass through a rendering loop that uses these routines, // call this function to set up the span pointers. -void rt_initcols_pal (canvas_pixel_t *buff) +void rt_initcols_pal (BYTE *buff) { int y; @@ -1021,7 +1023,7 @@ void rt_initcols_pal (canvas_pixel_t *buff) void R_DrawColumnHorizP_C (void) { int count = dc_count; - canvas_pixel_t *dest; + BYTE *dest; fixed_t fracstep; fixed_t frac; @@ -1082,7 +1084,7 @@ void R_FillColumnHorizP_C (void) { int count = dc_count; BYTE color = dc_color; - canvas_pixel_t *dest; + BYTE *dest; if (count <= 0) return; @@ -1113,6 +1115,7 @@ void R_FillColumnHorizP_C (void) void R_DrawMaskedColumnHoriz (const BYTE *column, const FTexture::Span *span) { + int pixelsize = r_swtruecolor ? 4 : 1; const fixed_t texturemid = FLOAT2FIXED(dc_texturemid); while (span->Length != 0) { @@ -1182,7 +1185,7 @@ void R_DrawMaskedColumnHoriz (const BYTE *column, const FTexture::Span *span) } } dc_source = column + top; - dc_dest = ylookup[dc_yl] + dc_x + dc_destorg; + dc_dest = (ylookup[dc_yl] + dc_x) * pixelsize + dc_destorg; dc_count = dc_yh - dc_yl + 1; hcolfunc_pre (); } diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index e8111be8f..872cb4b89 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -43,8 +43,8 @@ #include "r_things.h" #include "v_video.h" -canvas_pixel_t dc_temp_rgbabuff_rgba[MAXHEIGHT*4]; -canvas_pixel_t *dc_temp_rgba; +uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT*4]; +uint32_t *dc_temp_rgba; // Defined in r_draw_t.cpp: extern unsigned int dc_tspans[4][MAXHEIGHT]; @@ -54,8 +54,8 @@ extern unsigned int *horizspan[4]; // Copies one span at hx to the screen at sx. void rt_copy1col_RGBA_c (int hx, int sx, int yl, int yh) { - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -64,7 +64,7 @@ void rt_copy1col_RGBA_c (int hx, int sx, int yl, int yh) return; count++; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; @@ -106,8 +106,8 @@ void rt_copy4cols_RGBA_c (int sx, int yl, int yh) void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -119,7 +119,7 @@ void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) uint32_t light = calc_light_multiplier(dc_light); colormap = dc_colormap; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; @@ -143,8 +143,8 @@ void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) void rt_map4cols_RGBA_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -156,7 +156,7 @@ void rt_map4cols_RGBA_c (int sx, int yl, int yh) uint32_t light = calc_light_multiplier(dc_light); colormap = dc_colormap; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; @@ -188,7 +188,7 @@ void rt_map4cols_RGBA_c (int sx, int yl, int yh) void rt_Translate1col_RGBA_c(const BYTE *translation, int hx, int yl, int yh) { int count = yh - yl + 1; - canvas_pixel_t *source = &dc_temp_rgba[yl*4 + hx]; + uint32_t *source = &dc_temp_rgba[yl*4 + hx]; // Things we do to hit the compiler's optimizer with a clue bat: // 1. Parallelism is explicitly spelled out by using a separate @@ -235,7 +235,7 @@ void rt_Translate1col_RGBA_c(const BYTE *translation, int hx, int yl, int yh) void rt_Translate4cols_RGBA_c(const BYTE *translation, int yl, int yh) { int count = yh - yl + 1; - canvas_pixel_t *source = &dc_temp_rgba[yl*4]; + uint32_t *source = &dc_temp_rgba[yl*4]; int c0, c1; BYTE b0, b1; @@ -291,8 +291,8 @@ void rt_tlate4cols_RGBA_c (int sx, int yl, int yh) void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -301,7 +301,7 @@ void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) return; count++; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; colormap = dc_colormap; @@ -333,8 +333,8 @@ void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) void rt_add4cols_RGBA_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -343,7 +343,7 @@ void rt_add4cols_RGBA_c (int sx, int yl, int yh) return; count++; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; colormap = dc_colormap; @@ -392,8 +392,8 @@ void rt_tlateadd4cols_RGBA_c(int sx, int yl, int yh) void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -403,7 +403,7 @@ void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh) count++; colormap = dc_colormap; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; @@ -434,8 +434,8 @@ void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh) void rt_shaded4cols_RGBA_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -445,7 +445,7 @@ void rt_shaded4cols_RGBA_c (int sx, int yl, int yh) count++; colormap = dc_colormap; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; @@ -479,8 +479,8 @@ void rt_shaded4cols_RGBA_c (int sx, int yl, int yh) void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -489,7 +489,7 @@ void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) return; count++; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; colormap = dc_colormap; @@ -520,8 +520,8 @@ void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -530,7 +530,7 @@ void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) return; count++; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; colormap = dc_colormap; @@ -578,8 +578,8 @@ void rt_tlateaddclamp4cols_RGBA_c (int sx, int yl, int yh) void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -588,7 +588,7 @@ void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) return; count++; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; colormap = dc_colormap; @@ -619,8 +619,8 @@ void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -629,7 +629,7 @@ void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) return; count++; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; colormap = dc_colormap; @@ -678,8 +678,8 @@ void rt_tlatesubclamp4cols_RGBA_c (int sx, int yl, int yh) void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -690,7 +690,7 @@ void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; colormap = dc_colormap; @@ -721,8 +721,8 @@ void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -733,7 +733,7 @@ void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; colormap = dc_colormap; @@ -780,11 +780,11 @@ void rt_tlaterevsubclamp4cols_RGBA_c (int sx, int yl, int yh) // Before each pass through a rendering loop that uses these routines, // call this function to set up the span pointers. -void rt_initcols_rgba (canvas_pixel_t *buff) +void rt_initcols_rgba (BYTE *buff) { int y; - dc_temp_rgba = buff == NULL ? dc_temp_rgbabuff_rgba : buff; + dc_temp_rgba = buff == NULL ? dc_temp_rgbabuff_rgba : (uint32_t*)buff; for (y = 3; y >= 0; y--) horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0]; } @@ -794,7 +794,7 @@ void rt_initcols_rgba (canvas_pixel_t *buff) void R_DrawColumnHorizP_RGBA_C (void) { int count = dc_count; - canvas_pixel_t *dest; + uint32_t *dest; fixed_t fracstep; fixed_t frac; @@ -855,7 +855,7 @@ void R_FillColumnHorizP_RGBA_C (void) { int count = dc_count; BYTE color = dc_color; - canvas_pixel_t *dest; + uint32_t *dest; if (count <= 0) return; diff --git a/src/r_main.cpp b/src/r_main.cpp index d85cd62a0..9dc61eea3 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -577,9 +577,12 @@ void R_HighlightPortal (PortalDrawseg* pds) // [ZZ] NO OVERFLOW CHECKS HERE // I believe it won't break. if it does, blame me. :( + if (r_swtruecolor) // Assuming this is just a debug function + return; + BYTE color = (BYTE)BestColor((DWORD *)GPalette.BaseColors, 255, 0, 0, 0, 255); - canvas_pixel_t* pixels = RenderTarget->GetBuffer(); + BYTE* pixels = RenderTarget->GetBuffer(); // top edge for (int x = pds->x1; x < pds->x2; x++) { @@ -624,12 +627,26 @@ void R_EnterPortal (PortalDrawseg* pds, int depth) int Ytop = pds->ceilingclip[x-pds->x1]; int Ybottom = pds->floorclip[x-pds->x1]; - canvas_pixel_t *dest = RenderTarget->GetBuffer() + x + Ytop * spacing; - - for (int y = Ytop; y <= Ybottom; y++) + if (r_swtruecolor) { - *dest = color; - dest += spacing; + uint32_t *dest = (uint32_t*)RenderTarget->GetBuffer() + x + Ytop * spacing; + + uint32_t c = GPalette.BaseColors[color].d; + for (int y = Ytop; y <= Ybottom; y++) + { + *dest = c; + dest += spacing; + } + } + else + { + BYTE *dest = RenderTarget->GetBuffer() + x + Ytop * spacing; + + for (int y = Ytop; y <= Ybottom; y++) + { + *dest = color; + dest += spacing; + } } } @@ -795,10 +812,11 @@ void R_EnterPortal (PortalDrawseg* pds, int depth) void R_SetupBuffer () { - static canvas_pixel_t *lastbuff = NULL; + static BYTE *lastbuff = NULL; int pitch = RenderTarget->GetPitch(); - canvas_pixel_t *lineptr = RenderTarget->GetBuffer() + viewwindowy*pitch + viewwindowx; + int pixelsize = r_swtruecolor ? 4 : 1; + BYTE *lineptr = RenderTarget->GetBuffer() + (viewwindowy*pitch + viewwindowx) * pixelsize; if (dc_pitch != pitch || lineptr != lastbuff) { diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 8d0c882ba..a71590c9d 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -354,12 +354,12 @@ void R_CalcTiltedLighting (double lval, double lend, int width) // //========================================================================== -void R_MapTiltedPlane (int y, int x1) +void R_MapTiltedPlane_C (int y, int x1) { int x2 = spanend[y]; int width = x2 - x1; double iz, uz, vz; - canvas_pixel_t *fb; + BYTE *fb; DWORD u, v; int i; @@ -478,6 +478,130 @@ void R_MapTiltedPlane (int y, int x1) #endif } +void R_MapTiltedPlane_RGBA (int y, int x1) +{ + int x2 = spanend[y]; + int width = x2 - x1; + double iz, uz, vz; + uint32_t *fb; + DWORD u, v; + int i; + + iz = plane_sz[2] + plane_sz[1]*(centery-y) + plane_sz[0]*(x1-centerx); + + // Lighting is simple. It's just linear interpolation from start to end + if (plane_shade) + { + uz = (iz + plane_sz[0]*width) * planelightfloat; + vz = iz * planelightfloat; + R_CalcTiltedLighting (vz, uz, width); + } + + uz = plane_su[2] + plane_su[1]*(centery-y) + plane_su[0]*(x1-centerx); + vz = plane_sv[2] + plane_sv[1]*(centery-y) + plane_sv[0]*(x1-centerx); + + fb = ylookup[y] + x1 + (uint32_t*)dc_destorg; + + BYTE vshift = 32 - ds_ybits; + BYTE ushift = vshift - ds_xbits; + int umask = ((1 << ds_xbits) - 1) << ds_ybits; + +#if 0 // The "perfect" reference version of this routine. Pretty slow. + // Use it only to see how things are supposed to look. + i = 0; + do + { + double z = 1.f/iz; + + u = SQWORD(uz*z) + pviewx; + v = SQWORD(vz*z) + pviewy; + ds_colormap = tiltlighting[i]; + ds_light = 0; + fb[i++] = ds_colormap[ds_source[(v >> vshift) | ((u >> ushift) & umask)]]; + iz += plane_sz[0]; + uz += plane_su[0]; + vz += plane_sv[0]; + } while (--width >= 0); +#else +//#define SPANSIZE 32 +//#define INVSPAN 0.03125f +//#define SPANSIZE 8 +//#define INVSPAN 0.125f +#define SPANSIZE 16 +#define INVSPAN 0.0625f + + double startz = 1.f/iz; + double startu = uz*startz; + double startv = vz*startz; + double izstep, uzstep, vzstep; + + izstep = plane_sz[0] * SPANSIZE; + uzstep = plane_su[0] * SPANSIZE; + vzstep = plane_sv[0] * SPANSIZE; + x1 = 0; + width++; + + while (width >= SPANSIZE) + { + iz += izstep; + uz += uzstep; + vz += vzstep; + + double endz = 1.f/iz; + double endu = uz*endz; + double endv = vz*endz; + DWORD stepu = SQWORD((endu - startu) * INVSPAN); + DWORD stepv = SQWORD((endv - startv) * INVSPAN); + u = SQWORD(startu) + pviewx; + v = SQWORD(startv) + pviewy; + + for (i = SPANSIZE-1; i >= 0; i--) + { + fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); + x1++; + u += stepu; + v += stepv; + } + startu = endu; + startv = endv; + width -= SPANSIZE; + } + if (width > 0) + { + if (width == 1) + { + u = SQWORD(startu); + v = SQWORD(startv); + fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); + } + else + { + double left = width; + iz += plane_sz[0] * left; + uz += plane_su[0] * left; + vz += plane_sv[0] * left; + + double endz = 1.f/iz; + double endu = uz*endz; + double endv = vz*endz; + left = 1.f/left; + DWORD stepu = SQWORD((endu - startu) * left); + DWORD stepv = SQWORD((endv - startv) * left); + u = SQWORD(startu) + pviewx; + v = SQWORD(startv) + pviewy; + + for (; width != 0; width--) + { + fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); + x1++; + u += stepu; + v += stepv; + } + } + } +#endif +} + //========================================================================== // // R_MapColoredPlane @@ -491,7 +615,7 @@ void R_MapColoredPlane_C (int y, int x1) void R_MapColoredPlane_RGBA(int y, int x1) { - canvas_pixel_t *dest = ylookup[y] + x1 + dc_destorg; + uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; int count = (spanend[y] - x1 + 1); uint32_t light = calc_light_multiplier(ds_light); uint32_t color = shade_pal_index(ds_color, light); diff --git a/src/r_plane.h b/src/r_plane.h index ac63501e3..7505ac995 100644 --- a/src/r_plane.h +++ b/src/r_plane.h @@ -94,6 +94,10 @@ void R_DrawTiltedPlane (visplane_t *pl, double xscale, double yscale, fixed_t al void R_MapVisPlane (visplane_t *pl, void (*mapfunc)(int y, int x1)); extern void(*R_MapColoredPlane)(int y, int x1); +extern void(*R_MapTiltedPlane)(int y, int x1); + +void R_MapTiltedPlane_C(int y, int x1); +void R_MapTiltedPlane_RGBA(int y, int x); void R_MapColoredPlane_C(int y, int x1); void R_MapColoredPlane_RGBA(int y, int x1); diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 548cd994f..8c71f0fb7 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1071,7 +1071,7 @@ void R_RenderFakeWallRange (drawseg_t *ds, int x1, int x2) } // prevlineasm1 is like vlineasm1 but skips the loop if only drawing one pixel -inline fixed_t prevline1 (fixed_t vince, BYTE *colormap, fixed_t light, int count, fixed_t vplce, const BYTE *bufplce, canvas_pixel_t *dest) +inline fixed_t prevline1 (fixed_t vince, BYTE *colormap, fixed_t light, int count, fixed_t vplce, const BYTE *bufplce, BYTE *dest) { dc_iscale = vince; dc_colormap = colormap; @@ -1107,6 +1107,8 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l //extern cycle_t WallScanCycles; //clock (WallScanCycles); + int pixelsize = r_swtruecolor ? 4 : 1; + rw_pic->GetHeight(); // Make sure texture size is loaded fracbits = 32 - rw_pic->HeightBits; setupvline(fracbits); @@ -1144,7 +1146,7 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dc_dest = ylookup[y1ve[0]] + x + dc_destorg; + dc_dest = (ylookup[y1ve[0]] + x)*pixelsize + dc_destorg; dc_count = y2ve[0] - y1ve[0]; iscale = swal[x] * yrepeat; dc_iscale = xs_ToFixed(fracbits, iscale); @@ -1202,7 +1204,7 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l { if (!(bad & 1)) { - prevline1(vince[z],palookupoffse[z],palookuplight[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+x+z+dc_destorg); + prevline1(vince[z],palookupoffse[z],palookuplight[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+x+z)*pixelsize+dc_destorg); } bad >>= 1; } @@ -1213,23 +1215,23 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l { if (u4 > y1ve[z]) { - vplce[z] = prevline1(vince[z],palookupoffse[z], palookuplight[z],u4-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+x+z+dc_destorg); + vplce[z] = prevline1(vince[z],palookupoffse[z], palookuplight[z],u4-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+x+z)*pixelsize+dc_destorg); } } if (d4 > u4) { dc_count = d4-u4; - dc_dest = ylookup[u4]+x+dc_destorg; + dc_dest = (ylookup[u4]+x)*pixelsize+dc_destorg; dovline4(); } - canvas_pixel_t *i = x+ylookup[d4]+dc_destorg; + BYTE *i = (x+ylookup[d4])*pixelsize+dc_destorg; for (z = 0; z < 4; ++z) { if (y2ve[z] > d4) { - prevline1(vince[z],palookupoffse[0],palookuplight[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z); + prevline1(vince[z],palookupoffse[0],palookuplight[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z*pixelsize); } } } @@ -1248,7 +1250,7 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dc_dest = ylookup[y1ve[0]] + x + dc_destorg; + dc_dest = (ylookup[y1ve[0]] + x) * pixelsize + dc_destorg; dc_count = y2ve[0] - y1ve[0]; iscale = swal[x] * yrepeat; dc_iscale = xs_ToFixed(fracbits, iscale); @@ -1435,7 +1437,7 @@ static void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *d } } -inline fixed_t mvline1 (fixed_t vince, BYTE *colormap, int count, fixed_t vplce, const BYTE *bufplce, canvas_pixel_t *dest) +inline fixed_t mvline1 (fixed_t vince, BYTE *colormap, int count, fixed_t vplce, const BYTE *bufplce, BYTE *dest) { dc_iscale = vince; dc_colormap = colormap; @@ -1451,7 +1453,8 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) { int x, fracbits; - canvas_pixel_t *p; + BYTE *pixel; + int pixelsize, pixelshift; int y1ve[4], y2ve[4], u4, d4, startx, dax, z; char bad; float light = rw_light - rw_lightstep; @@ -1473,6 +1476,9 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ //extern cycle_t WallScanCycles; //clock (WallScanCycles); + pixelsize = r_swtruecolor ? 4 : 1; + pixelshift = r_swtruecolor ? 2 : 0; + rw_pic->GetHeight(); // Make sure texture size is loaded fracbits = 32- rw_pic->HeightBits; setupmvline(fracbits); @@ -1480,7 +1486,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ basecolormapdata = basecolormap->Maps; x = startx = x1; - p = x + dc_destorg; + pixel = x * pixelsize + dc_destorg; bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); if (fixed) @@ -1489,9 +1495,13 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ palookupoffse[1] = dc_colormap; palookupoffse[2] = dc_colormap; palookupoffse[3] = dc_colormap; + palookuplight[0] = 0; + palookuplight[1] = 0; + palookuplight[2] = 0; + palookuplight[3] = 0; } - for(; (x < x2) && (((size_t)p/sizeof(canvas_pixel_t)) & 3); ++x, ++p) + for(; (x < x2) && (((size_t)pixel >> pixelshift) & 3); ++x, pixel += pixelsize) { light += rw_lightstep; y1ve[0] = uwal[x];//max(uwal[x],umost[x]); @@ -1505,7 +1515,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dc_dest = ylookup[y1ve[0]] + p; + dc_dest = ylookup[y1ve[0]] * pixelsize + pixel; dc_count = y2ve[0] - y1ve[0]; iscale = swal[x] * yrepeat; dc_iscale = xs_ToFixed(fracbits, iscale); @@ -1514,7 +1524,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ domvline1(); } - for(; x < x2-3; x += 4, p+= 4) + for(; x < x2-3; x += 4, pixel += 4 * pixelsize) { bad = 0; for (z = 3, dax = x+3; z >= 0; --z, --dax) @@ -1539,7 +1549,16 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ for (z = 0; z < 4; ++z) { light += rw_lightstep; - palookupoffse[z] = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); + if (r_swtruecolor) + { + palookupoffse[z] = basecolormapdata; + palookuplight[z] = LIGHTSCALE(light, wallshade); + } + else + { + palookupoffse[z] = basecolormapdata + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + palookuplight[z] = 0; + } } } @@ -1552,7 +1571,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ { if (!(bad & 1)) { - mvline1(vince[z],palookupoffse[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+p+z); + mvline1(vince[z],palookupoffse[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); } bad >>= 1; } @@ -1563,27 +1582,27 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ { if (u4 > y1ve[z]) { - vplce[z] = mvline1(vince[z],palookupoffse[z],u4-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+p+z); + vplce[z] = mvline1(vince[z],palookupoffse[z],u4-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); } } if (d4 > u4) { dc_count = d4-u4; - dc_dest = ylookup[u4]+p; + dc_dest = ylookup[u4]*pixelsize+pixel; domvline4(); } - canvas_pixel_t *i = p+ylookup[d4]; + BYTE *i = pixel+ylookup[d4]*pixelsize; for (z = 0; z < 4; ++z) { if (y2ve[z] > d4) { - mvline1(vince[z],palookupoffse[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z); + mvline1(vince[z],palookupoffse[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z*pixelsize); } } } - for(; x < x2; ++x, ++p) + for(; x < x2; ++x, pixel += pixelsize) { light += rw_lightstep; y1ve[0] = uwal[x]; @@ -1597,7 +1616,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dc_dest = ylookup[y1ve[0]] + p; + dc_dest = ylookup[y1ve[0]]*pixelsize + pixel; dc_count = y2ve[0] - y1ve[0]; iscale = swal[x] * yrepeat; dc_iscale = xs_ToFixed(fracbits, iscale); @@ -1611,7 +1630,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ NetUpdate (); } -inline void preptmvline1 (fixed_t vince, BYTE *colormap, fixed_t light, int count, fixed_t vplce, const BYTE *bufplce, canvas_pixel_t *dest) +inline void preptmvline1 (fixed_t vince, BYTE *colormap, fixed_t light, int count, fixed_t vplce, const BYTE *bufplce, BYTE *dest) { dc_iscale = vince; dc_colormap = colormap; @@ -1628,7 +1647,8 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f fixed_t (*tmvline1)(); void (*tmvline4)(); int x, fracbits; - canvas_pixel_t *p; + BYTE *pixel; + int pixelsize, pixelshift; int y1ve[4], y2ve[4], u4, d4, startx, dax, z; char bad; float light = rw_light - rw_lightstep; @@ -1651,6 +1671,9 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f //extern cycle_t WallScanCycles; //clock (WallScanCycles); + pixelsize = r_swtruecolor ? 4 : 1; + pixelshift = r_swtruecolor ? 2 : 0; + rw_pic->GetHeight(); // Make sure texture size is loaded fracbits = 32 - rw_pic->HeightBits; setuptmvline(fracbits); @@ -1659,7 +1682,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f fixed_t centeryfrac = FLOAT2FIXED(CenterY); x = startx = x1; - p = x + dc_destorg; + pixel = x * pixelsize + dc_destorg; bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); if (fixed) @@ -1674,7 +1697,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f palookuplight[3] = 0; } - for(; (x < x2) && (((size_t)p / sizeof(canvas_pixel_t)) & 3); ++x, ++p) + for(; (x < x2) && (((size_t)pixel >> pixelshift) & 3); ++x, pixel += pixelsize) { light += rw_lightstep; y1ve[0] = uwal[x];//max(uwal[x],umost[x]); @@ -1687,7 +1710,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dc_dest = ylookup[y1ve[0]] + p; + dc_dest = ylookup[y1ve[0]] * pixelsize + pixel; dc_count = y2ve[0] - y1ve[0]; iscale = swal[x] * yrepeat; dc_iscale = xs_ToFixed(fracbits, iscale); @@ -1696,7 +1719,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f tmvline1(); } - for(; x < x2-3; x += 4, p+= 4) + for(; x < x2-3; x += 4, pixel += 4 * pixelsize) { bad = 0; for (z = 3, dax = x+3; z >= 0; --z, --dax) @@ -1742,7 +1765,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f { if (!(bad & 1)) { - preptmvline1(vince[z],palookupoffse[z],palookuplight[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+p+z); + preptmvline1(vince[z],palookupoffse[z],palookuplight[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); tmvline1(); } bad >>= 1; @@ -1754,7 +1777,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f { if (u4 > y1ve[z]) { - preptmvline1(vince[z],palookupoffse[z],palookuplight[z],u4-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+p+z); + preptmvline1(vince[z],palookupoffse[z],palookuplight[z],u4-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); vplce[z] = tmvline1(); } } @@ -1762,21 +1785,21 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f if (d4 > u4) { dc_count = d4-u4; - dc_dest = ylookup[u4]+p; + dc_dest = ylookup[u4]*pixelsize+pixel; tmvline4(); } - canvas_pixel_t *i = p+ylookup[d4]; + BYTE *i = pixel+ylookup[d4]*pixelsize; for (z = 0; z < 4; ++z) { if (y2ve[z] > d4) { - preptmvline1(vince[z],palookupoffse[0],palookuplight[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z); + preptmvline1(vince[z],palookupoffse[0],palookuplight[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z*pixelsize); tmvline1(); } } } - for(; x < x2; ++x, ++p) + for(; x < x2; ++x, pixel += pixelsize) { light += rw_lightstep; y1ve[0] = uwal[x]; @@ -1789,7 +1812,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dc_dest = ylookup[y1ve[0]] + p; + dc_dest = ylookup[y1ve[0]] * pixelsize + pixel; dc_count = y2ve[0] - y1ve[0]; iscale = swal[x] * yrepeat; dc_iscale = xs_ToFixed(fracbits, iscale); diff --git a/src/r_things.cpp b/src/r_things.cpp index 22538bd40..2abcc0e12 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -98,6 +98,7 @@ EXTERN_CVAR (Bool, st_scale) EXTERN_CVAR(Bool, r_shadercolormaps) EXTERN_CVAR(Int, r_drawfuzz) EXTERN_CVAR(Bool, r_deathcamera); +EXTERN_CVAR(Bool, r_swtruecolor) // // Sprite rotation 0 is facing the viewer, @@ -132,7 +133,7 @@ EXTERN_CVAR (Bool, r_drawvoxels) // int OffscreenBufferWidth, OffscreenBufferHeight; -canvas_pixel_t *OffscreenColorBuffer; +BYTE *OffscreenColorBuffer; FCoverageBuffer *OffscreenCoverageBuffer; // @@ -244,6 +245,7 @@ bool sprflipvert; void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *span) { + int pixelsize = r_swtruecolor ? 4 : 1; const fixed_t centeryfrac = FLOAT2FIXED(CenterY); const fixed_t texturemid = FLOAT2FIXED(dc_texturemid); while (span->Length != 0) @@ -314,7 +316,7 @@ void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *span) } } dc_source = column + top; - dc_dest = ylookup[dc_yl] + dc_x + dc_destorg; + dc_dest = (ylookup[dc_yl] + dc_x) * pixelsize + dc_destorg; dc_count = dc_yh - dc_yl + 1; colfunc (); } @@ -688,6 +690,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop // Blend the voxel, if that's what we need to do. if ((flags & ~DVF_MIRRORED) != 0) { + int pixelsize = r_swtruecolor ? 4 : 1; for (int x = 0; x < viewwidth; ++x) { if (!(flags & DVF_SPANSONLY) && (x & 3) == 0) @@ -702,7 +705,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop dc_yl = span->Start; dc_yh = span->Stop - 1; dc_count = span->Stop - span->Start; - dc_dest = ylookup[span->Start] + x + dc_destorg; + dc_dest = (ylookup[span->Start] + x) * pixelsize + dc_destorg; colfunc(); } else @@ -2602,7 +2605,7 @@ static void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis) void R_DrawParticle_C (vissprite_t *vis) { int spacing; - canvas_pixel_t *dest; + BYTE *dest; BYTE color = vis->Style.colormap[vis->startfrac]; int yl = vis->y1; int ycount = vis->y2 - yl + 1; @@ -2668,7 +2671,7 @@ void R_DrawParticle_C (vissprite_t *vis) void R_DrawParticle_RGBA(vissprite_t *vis) { int spacing; - canvas_pixel_t *dest; + uint32_t *dest; BYTE color = vis->Style.colormap[vis->startfrac]; int yl = vis->y1; int ycount = vis->y2 - yl + 1; @@ -2698,7 +2701,7 @@ void R_DrawParticle_RGBA(vissprite_t *vis) dc_x = x; if (R_ClipSpriteColumnWithPortals(vis)) continue; - dest = ylookup[yl] + x + dc_destorg; + dest = ylookup[yl] + x + (uint32_t*)dc_destorg; for (int y = 0; y < ycount; y++) { uint32_t bg_red = (*dest >> 16) & 0xff; @@ -2759,6 +2762,8 @@ void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, R_SetupDrawSlab(colormap); + int pixelsize = r_swtruecolor ? 4 : 1; + // Select mip level i = abs(DMulScale6(dasprx - globalposx, cosang, daspry - globalposy, sinang)); i = DivScale6(i, MIN(daxscale, dayscale)); @@ -3012,7 +3017,7 @@ void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, if (!(flags & DVF_OFFSCREEN)) { // Draw directly to the screen. - R_DrawSlab(xxr - xxl, yplc[xxl], z2 - z1, yinc, col, ylookup[z1] + lxt + xxl + dc_destorg); + R_DrawSlab(xxr - xxl, yplc[xxl], z2 - z1, yinc, col, (ylookup[z1] + lxt + xxl) * pixelsize + dc_destorg); } else { @@ -3243,12 +3248,12 @@ void R_CheckOffscreenBuffer(int width, int height, bool spansonly) { if (OffscreenColorBuffer == NULL) { - OffscreenColorBuffer = new canvas_pixel_t[width * height]; + OffscreenColorBuffer = new BYTE[width * height * 4]; } else if (OffscreenBufferWidth != width || OffscreenBufferHeight != height) { delete[] OffscreenColorBuffer; - OffscreenColorBuffer = new canvas_pixel_t[width * height]; + OffscreenColorBuffer = new BYTE[width * height * 4]; } } OffscreenBufferWidth = width; diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 984375f25..ed6571ad3 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -179,7 +179,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) fixedcolormap = dc_colormap; ESPSResult mode = R_SetPatchStyle (parms.style, parms.Alpha, 0, parms.fillcolor); - canvas_pixel_t *destorgsave = dc_destorg; + BYTE *destorgsave = dc_destorg; dc_destorg = screen->GetBuffer(); if (dc_destorg == NULL) { @@ -1021,7 +1021,7 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) if (r_swtruecolor) { - canvas_pixel_t *spot = GetBuffer() + oldyyshifted + xx; + uint32_t *spot = (uint32_t*)GetBuffer() + oldyyshifted + xx; uint32_t fg = shade_pal_index(basecolor, calc_light_multiplier(0)); uint32_t fg_red = (fg >> 16) & 0xff; @@ -1040,7 +1040,7 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) } else { - canvas_pixel_t *spot = GetBuffer() + oldyyshifted + xx; + BYTE *spot = GetBuffer() + oldyyshifted + xx; DWORD *bg2rgb = Col2RGB8[1+level]; DWORD *fg2rgb = Col2RGB8[63-level]; DWORD fg = fg2rgb[basecolor]; @@ -1091,27 +1091,62 @@ void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 real { swapvalues (x0, x1); } - memset (GetBuffer() + y0*GetPitch() + x0, palColor, deltaX+1); + if (r_swtruecolor) + { + uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0; + for (int i = 0; i <= deltaX; i++) + spot[i] = palColor; + } + else + { + memset (GetBuffer() + y0*GetPitch() + x0, palColor, deltaX+1); + } } else if (deltaX == 0) { // vertical line - canvas_pixel_t *spot = GetBuffer() + y0*GetPitch() + x0; - int pitch = GetPitch (); - do + if (r_swtruecolor) { - *spot = palColor; - spot += pitch; - } while (--deltaY != 0); + uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0; + int pitch = GetPitch(); + do + { + *spot = palColor; + spot += pitch; + } while (--deltaY != 0); + } + else + { + BYTE *spot = GetBuffer() + y0*GetPitch() + x0; + int pitch = GetPitch(); + do + { + *spot = palColor; + spot += pitch; + } while (--deltaY != 0); + } } else if (deltaX == deltaY) { // diagonal line. - canvas_pixel_t *spot = GetBuffer() + y0*GetPitch() + x0; - int advance = GetPitch() + xDir; - do + if (r_swtruecolor) { - *spot = palColor; - spot += advance; - } while (--deltaY != 0); + uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0; + int advance = GetPitch() + xDir; + do + { + *spot = palColor; + spot += advance; + } while (--deltaY != 0); + } + else + { + BYTE *spot = GetBuffer() + y0*GetPitch() + x0; + int advance = GetPitch() + xDir; + do + { + *spot = palColor; + spot += advance; + } while (--deltaY != 0); + } } else { @@ -1231,7 +1266,6 @@ void DCanvas::DrawPixel(int x, int y, int palColor, uint32 realcolor) void DCanvas::Clear (int left, int top, int right, int bottom, int palcolor, uint32 color) { int x, y; - canvas_pixel_t *dest; if (left == right || top == bottom) { @@ -1261,12 +1295,26 @@ void DCanvas::Clear (int left, int top, int right, int bottom, int palcolor, uin palcolor = PalFromRGB(color); } - dest = Buffer + top * Pitch + left; - x = right - left; - for (y = top; y < bottom; y++) + if (r_swtruecolor) { - memset(dest, palcolor, x); - dest += Pitch; + uint32_t *dest = (uint32_t*)Buffer + top * Pitch + left; + x = right - left; + for (y = top; y < bottom; y++) + { + for (int i = 0; i < x; i++) + dest[i] = palcolor; + dest += Pitch; + } + } + else + { + BYTE *dest = Buffer + top * Pitch + left; + x = right - left; + for (y = top; y < bottom; y++) + { + memset(dest, palcolor, x); + dest += Pitch; + } } } @@ -1452,11 +1500,14 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, // V_DrawBlock // Draw a linear block of pixels into the view buffer. // -void DCanvas::DrawBlock (int x, int y, int _width, int _height, const canvas_pixel_t *src) const +void DCanvas::DrawBlock (int x, int y, int _width, int _height, const BYTE *src) const { + if (r_swtruecolor) + return; + int srcpitch = _width; int destpitch; - canvas_pixel_t *dest; + BYTE *dest; if (ClipBox (x, y, _width, _height, src, srcpitch)) { @@ -1468,7 +1519,7 @@ void DCanvas::DrawBlock (int x, int y, int _width, int _height, const canvas_pix do { - memcpy (dest, src, _width * sizeof(canvas_pixel_t)); + memcpy (dest, src, _width); src += srcpitch; dest += destpitch; } while (--_height); @@ -1478,9 +1529,12 @@ void DCanvas::DrawBlock (int x, int y, int _width, int _height, const canvas_pix // V_GetBlock // Gets a linear block of pixels from the view buffer. // -void DCanvas::GetBlock (int x, int y, int _width, int _height, canvas_pixel_t *dest) const +void DCanvas::GetBlock (int x, int y, int _width, int _height, BYTE *dest) const { - const canvas_pixel_t *src; + if (r_swtruecolor) + return; + + const BYTE *src; #ifdef RANGECHECK if (x<0 @@ -1496,14 +1550,14 @@ void DCanvas::GetBlock (int x, int y, int _width, int _height, canvas_pixel_t *d while (_height--) { - memcpy (dest, src, _width * sizeof(canvas_pixel_t)); + memcpy (dest, src, _width); src += Pitch; dest += _width; } } // Returns true if the box was completely clipped. False otherwise. -bool DCanvas::ClipBox (int &x, int &y, int &w, int &h, const canvas_pixel_t *&src, const int srcpitch) const +bool DCanvas::ClipBox (int &x, int &y, int &w, int &h, const BYTE *&src, const int srcpitch) const { if (x >= Width || y >= Height || x+w <= 0 || y+h <= 0) { // Completely clipped off screen diff --git a/src/v_video.cpp b/src/v_video.cpp index 2fb46e88a..01043b8bc 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -345,7 +345,6 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) return; int gap; - canvas_pixel_t *spot; int x, y; if (x1 >= Width || y1 >= Height) @@ -365,11 +364,12 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) return; } - spot = Buffer + x1 + y1*Pitch; gap = Pitch - w; if (r_swtruecolor) { + uint32_t *spot = (uint32_t*)Buffer + x1 + y1*Pitch; + uint32_t fg = color.d; uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -402,6 +402,8 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) } else { + BYTE *spot = Buffer + x1 + y1*Pitch; + DWORD *bg2rgb; DWORD fg; @@ -441,12 +443,12 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) // //========================================================================== -void DCanvas::GetScreenshotBuffer(const canvas_pixel_t *&buffer, int &pitch, ESSType &color_type) +void DCanvas::GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &color_type) { Lock(true); buffer = GetBuffer(); pitch = GetPitch(); - color_type = SS_PAL; + color_type = r_swtruecolor ? SS_BGRA : SS_PAL; } //========================================================================== @@ -797,8 +799,8 @@ DSimpleCanvas::DSimpleCanvas (int width, int height) Pitch = width + MAX(0, CPU.DataL1LineSize - 8); } } - MemBuffer = new canvas_pixel_t[Pitch * height]; - memset (MemBuffer, 0, Pitch * height * sizeof(canvas_pixel_t)); + MemBuffer = new BYTE[Pitch * height * 4]; + memset (MemBuffer, 0, Pitch * height * 4); } //========================================================================== @@ -917,7 +919,7 @@ void DFrameBuffer::DrawRateStuff () { int i = I_GetTime(false); int tics = i - LastTic; - canvas_pixel_t *buffer = GetBuffer(); + BYTE *buffer = GetBuffer(); LastTic = i; if (tics > 20) tics = 20; @@ -925,10 +927,21 @@ void DFrameBuffer::DrawRateStuff () // Buffer can be NULL if we're doing hardware accelerated 2D if (buffer != NULL) { - buffer += (GetHeight()-1) * GetPitch(); - - for (i = 0; i < tics*2; i += 2) buffer[i] = 0xff; - for ( ; i < 20*2; i += 2) buffer[i] = 0x00; + if (r_swtruecolor) + { + uint32_t *buffer32 = (uint32_t*)buffer; + buffer32 += (GetHeight() - 1) * GetPitch(); + + for (i = 0; i < tics * 2; i += 2) buffer32[i] = 0xffffffff; + for (; i < 20 * 2; i += 2) buffer32[i] = 0xff000000; + } + else + { + buffer += (GetHeight() - 1) * GetPitch(); + + for (i = 0; i < tics * 2; i += 2) buffer[i] = 0xff; + for (; i < 20 * 2; i += 2) buffer[i] = 0x00; + } } else { diff --git a/src/v_video.h b/src/v_video.h index 27c09ee36..fa1ce83df 100644 --- a/src/v_video.h +++ b/src/v_video.h @@ -189,7 +189,7 @@ public: virtual ~DCanvas (); // Member variable access - inline canvas_pixel_t *GetBuffer () const { return Buffer; } + inline BYTE *GetBuffer () const { return Buffer; } inline int GetWidth () const { return Width; } inline int GetHeight () const { return Height; } inline int GetPitch () const { return Pitch; } @@ -202,10 +202,10 @@ public: virtual bool IsLocked () { return Buffer != NULL; } // Returns true if the surface is locked // Draw a linear block of pixels into the canvas - virtual void DrawBlock (int x, int y, int width, int height, const canvas_pixel_t *src) const; + virtual void DrawBlock (int x, int y, int width, int height, const BYTE *src) const; // Reads a linear block of pixels into the view buffer. - virtual void GetBlock (int x, int y, int width, int height, canvas_pixel_t *dest) const; + virtual void GetBlock (int x, int y, int width, int height, BYTE *dest) const; // Dim the entire canvas for the menus virtual void Dim (PalEntry color = 0); @@ -237,7 +237,7 @@ public: // Retrieves a buffer containing image data for a screenshot. // Hint: Pitch can be negative for upside-down images, in which case buffer // points to the last row in the buffer, which will be the first row output. - virtual void GetScreenshotBuffer(const canvas_pixel_t *&buffer, int &pitch, ESSType &color_type); + virtual void GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &color_type); // Releases the screenshot buffer. virtual void ReleaseScreenshotBuffer(); @@ -262,13 +262,13 @@ public: void DrawChar (FFont *font, int normalcolor, int x, int y, BYTE character, int tag_first, ...); protected: - canvas_pixel_t *Buffer; + BYTE *Buffer; int Width; int Height; int Pitch; int LockCount; - bool ClipBox (int &left, int &top, int &width, int &height, const canvas_pixel_t *&src, const int srcpitch) const; + bool ClipBox (int &left, int &top, int &width, int &height, const BYTE *&src, const int srcpitch) const; void DrawTextureV(FTexture *img, double x, double y, uint32 tag, va_list tags) = delete; virtual void DrawTextureParms(FTexture *img, DrawParms &parms); bool ParseDrawTextureTags (FTexture *img, double x, double y, uint32 tag, va_list tags, DrawParms *parms, bool fortext) const; @@ -297,7 +297,7 @@ public: void Unlock (); protected: - canvas_pixel_t *MemBuffer; + BYTE *MemBuffer; DSimpleCanvas() {} }; diff --git a/src/win32/fb_d3d9.cpp b/src/win32/fb_d3d9.cpp index 0cc9045ee..0cd847b97 100644 --- a/src/win32/fb_d3d9.cpp +++ b/src/win32/fb_d3d9.cpp @@ -1316,7 +1316,7 @@ void D3DFB::Draw3DPart(bool copy3d) else { uint32_t *dest = (uint32_t *)lockrect.pBits; - uint32_t *src = MemBuffer; + uint32_t *src = (uint32_t*)MemBuffer; for (int y = 0; y < Height; y++) { memcpy(dest, src, Width * sizeof(uint32_t)); @@ -1744,7 +1744,7 @@ void D3DFB::SetBlendingRect(int x1, int y1, int x2, int y2) // //========================================================================== -void D3DFB::GetScreenshotBuffer(const canvas_pixel_t *&buffer, int &pitch, ESSType &color_type) +void D3DFB::GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &color_type) { D3DLOCKED_RECT lrect; @@ -1770,7 +1770,7 @@ void D3DFB::GetScreenshotBuffer(const canvas_pixel_t *&buffer, int &pitch, ESSTy } else { - buffer = (const canvas_pixel_t *)lrect.pBits; + buffer = (const BYTE *)lrect.pBits; pitch = lrect.Pitch; color_type = SS_BGRA; } diff --git a/src/win32/fb_ddraw.cpp b/src/win32/fb_ddraw.cpp index 9be571f98..fbdf035a3 100644 --- a/src/win32/fb_ddraw.cpp +++ b/src/win32/fb_ddraw.cpp @@ -999,8 +999,8 @@ DDrawFB::LockSurfRes DDrawFB::LockSurf (LPRECT lockrect, LPDIRECTDRAWSURFACE toL LOG1 ("Final result after restoration attempts: %08lx\n", hr); return NoGood; } - Buffer = (canvas_pixel_t *)desc.lpSurface; - Pitch = desc.lPitch / sizeof(canvas_pixel_t); + Buffer = (BYTE *)desc.lpSurface; + Pitch = desc.lPitch; BufferingNow = false; return wasLost ? GoodWasLost : Good; } diff --git a/src/win32/win32iface.h b/src/win32/win32iface.h index d26765100..0b3333d63 100644 --- a/src/win32/win32iface.h +++ b/src/win32/win32iface.h @@ -252,7 +252,7 @@ public: bool PaintToWindow (); void SetVSync (bool vsync); void NewRefreshRate(); - void GetScreenshotBuffer(const canvas_pixel_t *&buffer, int &pitch, ESSType &color_type); + void GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &color_type); void ReleaseScreenshotBuffer(); void SetBlendingRect (int x1, int y1, int x2, int y2); bool Begin2D (bool copy3d); From 05220a713320b0b14525baad647f8ace577e19ee Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 31 May 2016 09:36:18 +0200 Subject: [PATCH 06/94] Added IsBgra() to DCanvas Changed SWRender output format to be decided by IsBgra() --- src/f_wipe.cpp | 10 ++++----- src/posix/cocoa/i_video.mm | 10 +++++---- src/posix/hardware.h | 2 +- src/posix/sdl/hardware.cpp | 5 ++++- src/posix/sdl/sdlvideo.cpp | 6 +++--- src/posix/sdl/sdlvideo.h | 2 +- src/r_draw.cpp | 39 +++++++++++++++++++++++++++++++++- src/r_drawt.cpp | 2 -- src/r_main.cpp | 28 ++++++++++++++---------- src/r_main.h | 2 ++ src/r_segs.cpp | 2 -- src/r_swrenderer.cpp | 12 ++++++----- src/r_things.cpp | 1 - src/textures/canvastexture.cpp | 2 +- src/v_draw.cpp | 16 ++++++-------- src/v_video.cpp | 26 +++++++++++------------ src/v_video.h | 8 ++++--- src/win32/fb_d3d9.cpp | 17 +++++++-------- src/win32/fb_ddraw.cpp | 7 +----- src/win32/hardware.cpp | 5 ++++- src/win32/hardware.h | 2 +- src/win32/win32iface.h | 8 +++---- src/win32/win32video.cpp | 21 ++++++++++-------- wadsrc/static/menudef.txt | 2 +- 24 files changed, 139 insertions(+), 96 deletions(-) diff --git a/src/f_wipe.cpp b/src/f_wipe.cpp index 7e1ec678e..84b6036e4 100644 --- a/src/f_wipe.cpp +++ b/src/f_wipe.cpp @@ -33,8 +33,6 @@ // SCREEN WIPE PACKAGE // -EXTERN_CVAR(Bool, r_swtruecolor) - static int CurrentWipeType; static short *wipe_scr_start; @@ -385,7 +383,7 @@ static bool (*wipes[])(int) = // Returns true if the wipe should be performed. bool wipe_StartScreen (int type) { - if (r_swtruecolor) + if (screen->IsBgra()) return false; CurrentWipeType = clamp(type, 0, wipe_NUMWIPES - 1); @@ -401,7 +399,7 @@ bool wipe_StartScreen (int type) void wipe_EndScreen (void) { - if (r_swtruecolor) + if (screen->IsBgra()) return; if (CurrentWipeType) @@ -420,7 +418,7 @@ bool wipe_ScreenWipe (int ticks) { bool rc; - if (r_swtruecolor) + if (screen->IsBgra()) return true; if (CurrentWipeType == wipe_None) @@ -436,7 +434,7 @@ bool wipe_ScreenWipe (int ticks) // Final things for the wipe void wipe_Cleanup() { - if (r_swtruecolor) + if (screen->IsBgra()) return; if (wipe_scr_start != NULL) diff --git a/src/posix/cocoa/i_video.mm b/src/posix/cocoa/i_video.mm index 5e073daf3..c97460a02 100644 --- a/src/posix/cocoa/i_video.mm +++ b/src/posix/cocoa/i_video.mm @@ -96,6 +96,8 @@ EXTERN_CVAR(Bool, ticker ) EXTERN_CVAR(Bool, vid_vsync) EXTERN_CVAR(Bool, vid_hidpi) +CVAR(Bool, swtruecolor, false, CVAR_ARCHIVE) + CUSTOM_CVAR(Bool, fullscreen, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) { extern int NewWidth, NewHeight, NewBits, DisplayBits; @@ -199,7 +201,7 @@ public: virtual EDisplayType GetDisplayType() { return DISPLAY_Both; } virtual void SetWindowedScale(float scale); - virtual DFrameBuffer* CreateFrameBuffer(int width, int height, bool fs, DFrameBuffer* old); + virtual DFrameBuffer* CreateFrameBuffer(int width, int height, bool bgra, bool fs, DFrameBuffer* old); virtual void StartModeIterator(int bits, bool fullscreen); virtual bool NextMode(int* width, int* height, bool* letterbox); @@ -518,7 +520,7 @@ bool CocoaVideo::NextMode(int* const width, int* const height, bool* const lette return false; } -DFrameBuffer* CocoaVideo::CreateFrameBuffer(const int width, const int height, const bool fullscreen, DFrameBuffer* const old) +DFrameBuffer* CocoaVideo::CreateFrameBuffer(const int width, const int height, const bool bgra, const bool fullscreen, DFrameBuffer* const old) { PalEntry flashColor = 0; int flashAmount = 0; @@ -762,7 +764,7 @@ CocoaVideo* CocoaVideo::GetInstance() CocoaFrameBuffer::CocoaFrameBuffer(int width, int height, bool fullscreen) -: DFrameBuffer(width, height) +: DFrameBuffer(width, height, false) , m_needPaletteUpdate(false) , m_gamma(0.0f) , m_needGammaUpdate(false) @@ -1064,7 +1066,7 @@ void I_CreateRenderer() DFrameBuffer* I_SetMode(int &width, int &height, DFrameBuffer* old) { - return Video->CreateFrameBuffer(width, height, fullscreen, old); + return Video->CreateFrameBuffer(width, height, swtruecolor, fullscreen, old); } bool I_CheckResolution(const int width, const int height, const int bits) diff --git a/src/posix/hardware.h b/src/posix/hardware.h index 618941fe5..3c06cb6c6 100644 --- a/src/posix/hardware.h +++ b/src/posix/hardware.h @@ -74,7 +74,7 @@ class IVideo virtual EDisplayType GetDisplayType () = 0; virtual void SetWindowedScale (float scale) = 0; - virtual DFrameBuffer *CreateFrameBuffer (int width, int height, bool fs, DFrameBuffer *old) = 0; + virtual DFrameBuffer *CreateFrameBuffer (int width, int height, bool bgra, bool fs, DFrameBuffer *old) = 0; virtual void StartModeIterator (int bits, bool fs) = 0; virtual bool NextMode (int *width, int *height, bool *letterbox) = 0; diff --git a/src/posix/sdl/hardware.cpp b/src/posix/sdl/hardware.cpp index 6142eb1d8..52bca35e7 100644 --- a/src/posix/sdl/hardware.cpp +++ b/src/posix/sdl/hardware.cpp @@ -51,6 +51,7 @@ EXTERN_CVAR (Bool, ticker) EXTERN_CVAR (Bool, fullscreen) +EXTERN_CVAR (Bool, swtruecolor) EXTERN_CVAR (Float, vid_winscale) IVideo *Video; @@ -128,7 +129,7 @@ DFrameBuffer *I_SetMode (int &width, int &height, DFrameBuffer *old) fs = fullscreen; break; } - DFrameBuffer *res = Video->CreateFrameBuffer (width, height, fs, old); + DFrameBuffer *res = Video->CreateFrameBuffer (width, height, swtruecolor, fs, old); /* Right now, CreateFrameBuffer cannot return NULL if (res == NULL) @@ -280,6 +281,8 @@ CUSTOM_CVAR (Int, vid_maxfps, 200, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) } } +CVAR (Bool, swtruecolor, false, CVAR_ARCHIVE) + extern int NewWidth, NewHeight, NewBits, DisplayBits; CUSTOM_CVAR (Bool, fullscreen, false, CVAR_ARCHIVE|CVAR_GLOBALCONFIG) diff --git a/src/posix/sdl/sdlvideo.cpp b/src/posix/sdl/sdlvideo.cpp index 04c3a3f2e..b050097be 100644 --- a/src/posix/sdl/sdlvideo.cpp +++ b/src/posix/sdl/sdlvideo.cpp @@ -257,7 +257,7 @@ bool SDLVideo::NextMode (int *width, int *height, bool *letterbox) return false; } -DFrameBuffer *SDLVideo::CreateFrameBuffer (int width, int height, bool fullscreen, DFrameBuffer *old) +DFrameBuffer *SDLVideo::CreateFrameBuffer (int width, int height, bool bgra, bool fullscreen, DFrameBuffer *old) { static int retry = 0; static int owidth, oheight; @@ -335,7 +335,7 @@ DFrameBuffer *SDLVideo::CreateFrameBuffer (int width, int height, bool fullscree } ++retry; - fb = static_cast(CreateFrameBuffer (width, height, fullscreen, NULL)); + fb = static_cast(CreateFrameBuffer (width, height, bgra, fullscreen, NULL)); } retry = 0; @@ -351,7 +351,7 @@ void SDLVideo::SetWindowedScale (float scale) // FrameBuffer implementation ----------------------------------------------- SDLFB::SDLFB (int width, int height, bool fullscreen, SDL_Window *oldwin) - : DFrameBuffer (width, height) + : DFrameBuffer (width, height, false) { int i; diff --git a/src/posix/sdl/sdlvideo.h b/src/posix/sdl/sdlvideo.h index 072167b5a..385733bc1 100644 --- a/src/posix/sdl/sdlvideo.h +++ b/src/posix/sdl/sdlvideo.h @@ -10,7 +10,7 @@ class SDLVideo : public IVideo EDisplayType GetDisplayType () { return DISPLAY_Both; } void SetWindowedScale (float scale); - DFrameBuffer *CreateFrameBuffer (int width, int height, bool fs, DFrameBuffer *old); + DFrameBuffer *CreateFrameBuffer (int width, int height, bool bgra, bool fs, DFrameBuffer *old); void StartModeIterator (int bits, bool fs); bool NextMode (int *width, int *height, bool *letterbox); diff --git a/src/r_draw.cpp b/src/r_draw.cpp index f939406bb..82169ec6f 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -179,7 +179,6 @@ FDynamicColormap ShadeFakeColormap[16]; BYTE identitymap[256]; EXTERN_CVAR (Int, r_columnmethod) -EXTERN_CVAR (Bool, r_swtruecolor) void R_InitShadeMaps() { @@ -4129,6 +4128,14 @@ const BYTE *R_GetColumn (FTexture *tex, int col) // [RH] Initialize the column drawer pointers void R_InitColumnDrawers () { + // Save a copy when switching to true color mode as the assembly palette drawers might change them + static bool pointers_saved = false; + static DWORD(*dovline1_saved)(); + static DWORD(*doprevline1_saved)(); + static DWORD(*domvline1_saved)(); + static void(*dovline4_saved)(); + static void(*domvline4_saved)(); + if (r_swtruecolor) { R_DrawColumnHoriz = R_DrawColumnHorizP_RGBA_C; @@ -4201,6 +4208,16 @@ void R_InitColumnDrawers () rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_RGBA_c; rt_initcols = rt_initcols_rgba; + if (!pointers_saved) + { + pointers_saved = true; + dovline1_saved = dovline1; + doprevline1_saved = doprevline1; + domvline1_saved = domvline1; + dovline4_saved = dovline4; + domvline4_saved = domvline4; + } + dovline1 = vlinec1_RGBA; doprevline1 = vlinec1_RGBA; dovline4 = vlinec4_RGBA; @@ -4304,7 +4321,27 @@ void R_InitColumnDrawers () rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_c; rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_c; rt_initcols = rt_initcols_pal; + + if (pointers_saved) + { + pointers_saved = false; + dovline1 = dovline1_saved; + doprevline1 = doprevline1_saved; + domvline1 = domvline1_saved; + dovline4 = dovline4_saved; + domvline4 = domvline4_saved; + } } + + colfunc = basecolfunc = R_DrawColumn; + fuzzcolfunc = R_DrawFuzzColumn; + transcolfunc = R_DrawTranslatedColumn; + spanfunc = R_DrawSpan; + + // [RH] Horizontal column drawers + hcolfunc_pre = R_DrawColumnHoriz; + hcolfunc_post1 = rt_map1col; + hcolfunc_post4 = rt_map4cols; } // [RH] Choose column drawers in a single place diff --git a/src/r_drawt.cpp b/src/r_drawt.cpp index 485ed7ab3..e47590c72 100644 --- a/src/r_drawt.cpp +++ b/src/r_drawt.cpp @@ -47,8 +47,6 @@ #include "r_things.h" #include "v_video.h" -EXTERN_CVAR(Bool, r_swtruecolor) - // I should have commented this stuff better. // // dc_temp is the buffer R_DrawColumnHoriz writes into. diff --git a/src/r_main.cpp b/src/r_main.cpp index 9dc61eea3..aec8310d5 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -103,7 +103,8 @@ bool r_dontmaplines; CVAR (String, r_viewsize, "", CVAR_NOSET) CVAR (Bool, r_shadercolormaps, true, CVAR_ARCHIVE) -CVAR (Bool, r_swtruecolor, false, CVAR_ARCHIVE) + +bool r_swtruecolor; double r_BaseVisibility; double r_WallVisibility; @@ -398,16 +399,6 @@ void R_InitRenderer() R_InitPlanes (); R_InitShadeMaps(); R_InitColumnDrawers (); - - colfunc = basecolfunc = R_DrawColumn; - fuzzcolfunc = R_DrawFuzzColumn; - transcolfunc = R_DrawTranslatedColumn; - spanfunc = R_DrawSpan; - - // [RH] Horizontal column drawers - hcolfunc_pre = R_DrawColumnHoriz; - hcolfunc_post1 = rt_map1col; - hcolfunc_post4 = rt_map4cols; } //========================================================================== @@ -962,6 +953,13 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, int x, int y, int width, int height, bool dontmaplines) { const bool savedviewactive = viewactive; + const bool savedoutputformat = r_swtruecolor; + + if (r_swtruecolor != canvas->IsBgra()) + { + r_swtruecolor = canvas->IsBgra(); + R_InitColumnDrawers(); + } viewwidth = width; RenderTarget = canvas; @@ -980,7 +978,15 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, screen->Lock (true); R_SetupBuffer (); screen->Unlock (); + viewactive = savedviewactive; + r_swtruecolor = savedoutputformat; + + if (r_swtruecolor != canvas->IsBgra()) + { + r_swtruecolor = canvas->IsBgra(); + R_InitColumnDrawers(); + } } //========================================================================== diff --git a/src/r_main.h b/src/r_main.h index c1034ea3e..765635e5d 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -106,6 +106,8 @@ inline uint32_t shade_pal_index(uint32_t index, uint32_t light) return 0xff000000 | (red << 16) | (green << 8) | blue; } +extern bool r_swtruecolor; + extern double GlobVis; void R_SetVisibility(double visibility); diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 8c71f0fb7..cab97adfc 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -61,8 +61,6 @@ CVAR(Bool, r_np2, true, 0) //CVAR (Int, ty, 8, 0) //CVAR (Int, tx, 8, 0) -EXTERN_CVAR(Bool, r_swtruecolor) - #define HEIGHTBITS 12 #define HEIGHTSHIFT (FRACBITS-HEIGHTBITS) diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 433007acb..15e2fda8f 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -155,6 +155,12 @@ void FSoftwareRenderer::Precache(BYTE *texhitlist, TMap &act void FSoftwareRenderer::RenderView(player_t *player) { + if (r_swtruecolor != screen->IsBgra()) + { + r_swtruecolor = screen->IsBgra(); + R_InitColumnDrawers(); + } + R_RenderActorView (player->mo); // [RH] Let cameras draw onto textures that were visible this frame. FCanvasTextureInfo::UpdateAll (); @@ -182,8 +188,7 @@ void FSoftwareRenderer::RemapVoxels() void FSoftwareRenderer::WriteSavePic (player_t *player, FILE *file, int width, int height) { -#ifdef PALETTEOUTPUT - DCanvas *pic = new DSimpleCanvas (width, height); + DCanvas *pic = new DSimpleCanvas (width, height, false); PalEntry palette[256]; // Take a snapshot of the player's view @@ -196,7 +201,6 @@ void FSoftwareRenderer::WriteSavePic (player_t *player, FILE *file, int width, i pic->Destroy(); pic->ObjectFlags |= OF_YesReallyDelete; delete pic; -#endif } //=========================================================================== @@ -313,7 +317,6 @@ void FSoftwareRenderer::CopyStackedViewParameters() void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoint, int fov) { -#ifdef PALETTEOUTPUT BYTE *Pixels = const_cast(tex->GetPixels()); DSimpleCanvas *Canvas = tex->GetCanvas(); @@ -337,7 +340,6 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin tex->SetUpdated(); fixedcolormap = savecolormap; realfixedcolormap = savecm; -#endif } //========================================================================== diff --git a/src/r_things.cpp b/src/r_things.cpp index 2abcc0e12..f52c80376 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -98,7 +98,6 @@ EXTERN_CVAR (Bool, st_scale) EXTERN_CVAR(Bool, r_shadercolormaps) EXTERN_CVAR(Int, r_drawfuzz) EXTERN_CVAR(Bool, r_deathcamera); -EXTERN_CVAR(Bool, r_swtruecolor) // // Sprite rotation 0 is facing the viewer, diff --git a/src/textures/canvastexture.cpp b/src/textures/canvastexture.cpp index d1f70439f..7242149a4 100644 --- a/src/textures/canvastexture.cpp +++ b/src/textures/canvastexture.cpp @@ -103,7 +103,7 @@ const BYTE *FCanvasTexture::GetPixels () void FCanvasTexture::MakeTexture () { - Canvas = new DSimpleCanvas (Width, Height); + Canvas = new DSimpleCanvas (Width, Height, false); Canvas->Lock (); GC::AddSoftRoot(Canvas); diff --git a/src/v_draw.cpp b/src/v_draw.cpp index ed6571ad3..b4f1ad4b5 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -77,8 +77,6 @@ extern "C" short spanend[MAXHEIGHT]; CVAR (Bool, hud_scale, false, CVAR_ARCHIVE); -EXTERN_CVAR(Bool, r_swtruecolor) - // For routines that take RGB colors, cache the previous lookup in case there // are several repetitions with the same color. static int LastPal = -1; @@ -1019,7 +1017,7 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) oldyyshifted = yy * GetPitch(); } - if (r_swtruecolor) + if (IsBgra()) { uint32_t *spot = (uint32_t*)GetBuffer() + oldyyshifted + xx; @@ -1091,7 +1089,7 @@ void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 real { swapvalues (x0, x1); } - if (r_swtruecolor) + if (IsBgra()) { uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0; for (int i = 0; i <= deltaX; i++) @@ -1104,7 +1102,7 @@ void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 real } else if (deltaX == 0) { // vertical line - if (r_swtruecolor) + if (IsBgra()) { uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0; int pitch = GetPitch(); @@ -1127,7 +1125,7 @@ void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 real } else if (deltaX == deltaY) { // diagonal line. - if (r_swtruecolor) + if (IsBgra()) { uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0; int advance = GetPitch() + xDir; @@ -1295,7 +1293,7 @@ void DCanvas::Clear (int left, int top, int right, int bottom, int palcolor, uin palcolor = PalFromRGB(color); } - if (r_swtruecolor) + if (IsBgra()) { uint32_t *dest = (uint32_t*)Buffer + top * Pitch + left; x = right - left; @@ -1502,7 +1500,7 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, // void DCanvas::DrawBlock (int x, int y, int _width, int _height, const BYTE *src) const { - if (r_swtruecolor) + if (IsBgra()) return; int srcpitch = _width; @@ -1531,7 +1529,7 @@ void DCanvas::DrawBlock (int x, int y, int _width, int _height, const BYTE *src) // void DCanvas::GetBlock (int x, int y, int _width, int _height, BYTE *dest) const { - if (r_swtruecolor) + if (IsBgra()) return; const BYTE *src; diff --git a/src/v_video.cpp b/src/v_video.cpp index 01043b8bc..bc99edbf1 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -65,8 +65,6 @@ #include "menu/menu.h" #include "r_data/voxels.h" -EXTERN_CVAR(Bool, r_swtruecolor) - FRenderer *Renderer; IMPLEMENT_ABSTRACT_CLASS (DCanvas) @@ -83,7 +81,7 @@ class DDummyFrameBuffer : public DFrameBuffer DECLARE_CLASS (DDummyFrameBuffer, DFrameBuffer); public: DDummyFrameBuffer (int width, int height) - : DFrameBuffer (0, 0) + : DFrameBuffer (0, 0, false) { Width = width; Height = height; @@ -208,13 +206,14 @@ DCanvas *DCanvas::CanvasChain = NULL; // //========================================================================== -DCanvas::DCanvas (int _width, int _height) +DCanvas::DCanvas (int _width, int _height, bool _bgra) { // Init member vars Buffer = NULL; LockCount = 0; Width = _width; Height = _height; + Bgra = _bgra; // Add to list of active canvases Next = CanvasChain; @@ -366,7 +365,7 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) gap = Pitch - w; - if (r_swtruecolor) + if (IsBgra()) { uint32_t *spot = (uint32_t*)Buffer + x1 + y1*Pitch; @@ -448,7 +447,7 @@ void DCanvas::GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &colo Lock(true); buffer = GetBuffer(); pitch = GetPitch(); - color_type = r_swtruecolor ? SS_BGRA : SS_PAL; + color_type = IsBgra() ? SS_BGRA : SS_PAL; } //========================================================================== @@ -761,8 +760,8 @@ void DCanvas::CalcGamma (float gamma, BYTE gammalookup[256]) // //========================================================================== -DSimpleCanvas::DSimpleCanvas (int width, int height) - : DCanvas (width, height) +DSimpleCanvas::DSimpleCanvas (int width, int height, bool bgra) + : DCanvas (width, height, bgra) { // Making the pitch a power of 2 is very bad for performance // Try to maximize the number of cache lines that can be filled @@ -799,8 +798,9 @@ DSimpleCanvas::DSimpleCanvas (int width, int height) Pitch = width + MAX(0, CPU.DataL1LineSize - 8); } } - MemBuffer = new BYTE[Pitch * height * 4]; - memset (MemBuffer, 0, Pitch * height * 4); + int bytes_per_pixel = bgra ? 4 : 1; + MemBuffer = new BYTE[Pitch * height * bytes_per_pixel]; + memset (MemBuffer, 0, Pitch * height * bytes_per_pixel); } //========================================================================== @@ -869,8 +869,8 @@ void DSimpleCanvas::Unlock () // //========================================================================== -DFrameBuffer::DFrameBuffer (int width, int height) - : DSimpleCanvas (width, height) +DFrameBuffer::DFrameBuffer (int width, int height, bool bgra) + : DSimpleCanvas (width, height, bgra) { LastMS = LastSec = FrameCount = LastCount = LastTic = 0; Accel2D = false; @@ -927,7 +927,7 @@ void DFrameBuffer::DrawRateStuff () // Buffer can be NULL if we're doing hardware accelerated 2D if (buffer != NULL) { - if (r_swtruecolor) + if (IsBgra()) { uint32_t *buffer32 = (uint32_t*)buffer; buffer32 += (GetHeight() - 1) * GetPitch(); diff --git a/src/v_video.h b/src/v_video.h index fa1ce83df..120beff9a 100644 --- a/src/v_video.h +++ b/src/v_video.h @@ -185,7 +185,7 @@ class DCanvas : public DObject { DECLARE_ABSTRACT_CLASS (DCanvas, DObject) public: - DCanvas (int width, int height); + DCanvas (int width, int height, bool bgra); virtual ~DCanvas (); // Member variable access @@ -193,6 +193,7 @@ public: inline int GetWidth () const { return Width; } inline int GetHeight () const { return Height; } inline int GetPitch () const { return Pitch; } + inline bool IsBgra() const { return Bgra; } virtual bool IsValid (); @@ -267,6 +268,7 @@ protected: int Height; int Pitch; int LockCount; + bool Bgra; bool ClipBox (int &left, int &top, int &width, int &height, const BYTE *&src, const int srcpitch) const; void DrawTextureV(FTexture *img, double x, double y, uint32 tag, va_list tags) = delete; @@ -289,7 +291,7 @@ class DSimpleCanvas : public DCanvas { DECLARE_CLASS (DSimpleCanvas, DCanvas) public: - DSimpleCanvas (int width, int height); + DSimpleCanvas (int width, int height, bool bgra); ~DSimpleCanvas (); bool IsValid (); @@ -327,7 +329,7 @@ class DFrameBuffer : public DSimpleCanvas { DECLARE_ABSTRACT_CLASS (DFrameBuffer, DSimpleCanvas) public: - DFrameBuffer (int width, int height); + DFrameBuffer (int width, int height, bool bgra); // Force the surface to use buffered output if true is passed. virtual bool Lock (bool buffered) = 0; diff --git a/src/win32/fb_d3d9.cpp b/src/win32/fb_d3d9.cpp index 0cd847b97..fd84e3bbb 100644 --- a/src/win32/fb_d3d9.cpp +++ b/src/win32/fb_d3d9.cpp @@ -187,7 +187,6 @@ EXTERN_CVAR (Float, Gamma) EXTERN_CVAR (Bool, vid_vsync) EXTERN_CVAR (Float, transsouls) EXTERN_CVAR (Int, vid_refreshrate) -EXTERN_CVAR (Bool, r_swtruecolor) extern IDirect3D9 *D3D; @@ -243,8 +242,8 @@ CVAR(Bool, vid_hwaalines, true, CVAR_ARCHIVE|CVAR_GLOBALCONFIG) // //========================================================================== -D3DFB::D3DFB (UINT adapter, int width, int height, bool fullscreen) - : BaseWinFB (width, height) +D3DFB::D3DFB (UINT adapter, int width, int height, bool bgra, bool fullscreen) + : BaseWinFB (width, height, bgra) { D3DPRESENT_PARAMETERS d3dpp; @@ -766,7 +765,7 @@ void D3DFB::KillNativeTexs() bool D3DFB::CreateFBTexture () { - FBFormat = r_swtruecolor ? D3DFMT_A8R8G8B8 : D3DFMT_L8; + FBFormat = IsBgra() ? D3DFMT_A8R8G8B8 : D3DFMT_L8; if (FAILED(D3DDevice->CreateTexture(Width, Height, 1, D3DUSAGE_DYNAMIC, FBFormat, D3DPOOL_DEFAULT, &FBTexture, NULL))) { @@ -1307,7 +1306,7 @@ void D3DFB::Draw3DPart(bool copy3d) SUCCEEDED(FBTexture->LockRect (0, &lockrect, NULL, D3DLOCK_DISCARD))) || SUCCEEDED(FBTexture->LockRect (0, &lockrect, &texrect, 0))) { - if (r_swtruecolor && FBFormat == D3DFMT_A8R8G8B8) + if (IsBgra() && FBFormat == D3DFMT_A8R8G8B8) { if (lockrect.Pitch == Pitch * sizeof(uint32_t) && Pitch == Width) { @@ -1325,7 +1324,7 @@ void D3DFB::Draw3DPart(bool copy3d) } } } - else if (!r_swtruecolor && FBFormat == D3DFMT_L8) + else if (!IsBgra() && FBFormat == D3DFMT_L8) { if (lockrect.Pitch == Pitch && Pitch == Width) { @@ -1377,7 +1376,7 @@ void D3DFB::Draw3DPart(bool copy3d) memset(Constant, 0, sizeof(Constant)); SetAlphaBlend(D3DBLENDOP(0)); EnableAlphaTest(FALSE); - if (r_swtruecolor) + if (IsBgra()) SetPixelShader(Shaders[SHADER_NormalColor]); else SetPixelShader(Shaders[SHADER_NormalColorPal]); @@ -1398,7 +1397,7 @@ void D3DFB::Draw3DPart(bool copy3d) realfixedcolormap->ColorizeStart[1]/2, realfixedcolormap->ColorizeStart[2]/2, 0); color1 = D3DCOLOR_COLORVALUE(realfixedcolormap->ColorizeEnd[0]/2, realfixedcolormap->ColorizeEnd[1]/2, realfixedcolormap->ColorizeEnd[2]/2, 1); - if (r_swtruecolor) + if (IsBgra()) SetPixelShader(Shaders[SHADER_SpecialColormap]); else SetPixelShader(Shaders[SHADER_SpecialColormapPal]); @@ -1412,7 +1411,7 @@ void D3DFB::Draw3DPart(bool copy3d) CalcFullscreenCoords(verts, Accel2D, false, color0, color1); D3DDevice->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, verts, sizeof(FBVERTEX)); } - if (r_swtruecolor) + if (IsBgra()) SetPixelShader(Shaders[SHADER_NormalColor]); else SetPixelShader(Shaders[SHADER_NormalColorPal]); diff --git a/src/win32/fb_ddraw.cpp b/src/win32/fb_ddraw.cpp index fbdf035a3..5637e9695 100644 --- a/src/win32/fb_ddraw.cpp +++ b/src/win32/fb_ddraw.cpp @@ -60,9 +60,7 @@ // TYPES ------------------------------------------------------------------- -#ifdef USE_OBSOLETE_DDRAW IMPLEMENT_CLASS(DDrawFB) -#endif // EXTERNAL FUNCTION PROTOTYPES -------------------------------------------- @@ -120,10 +118,8 @@ cycle_t BlitCycles; // CODE -------------------------------------------------------------------- -#ifdef USE_OBSOLETE_DDRAW - DDrawFB::DDrawFB (int width, int height, bool fullscreen) - : BaseWinFB (width, height) + : BaseWinFB (width, height, false) { int i; @@ -1330,7 +1326,6 @@ void DDrawFB::Blank () PrimarySurf->Blt (NULL, NULL, NULL, DDBLT_COLORFILL, &blitFX); } } -#endif ADD_STAT (blit) { diff --git a/src/win32/hardware.cpp b/src/win32/hardware.cpp index 8cc770556..8856924c0 100644 --- a/src/win32/hardware.cpp +++ b/src/win32/hardware.cpp @@ -51,6 +51,7 @@ EXTERN_CVAR (Bool, ticker) EXTERN_CVAR (Bool, fullscreen) +EXTERN_CVAR (Bool, swtruecolor) EXTERN_CVAR (Float, vid_winscale) CVAR(Int, win_x, -1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) @@ -146,7 +147,7 @@ DFrameBuffer *I_SetMode (int &width, int &height, DFrameBuffer *old) } break; } - DFrameBuffer *res = Video->CreateFrameBuffer (width, height, fs, old); + DFrameBuffer *res = Video->CreateFrameBuffer (width, height, swtruecolor, fs, old); /* Right now, CreateFrameBuffer cannot return NULL if (res == NULL) @@ -310,6 +311,8 @@ void I_RestoreWindowedPos () MoveWindow (Window, winx, winy, winw, winh, TRUE); } +CVAR (Bool, swtruecolor, false, CVAR_ARCHIVE) + extern int NewWidth, NewHeight, NewBits, DisplayBits; CUSTOM_CVAR (Bool, fullscreen, true, CVAR_ARCHIVE|CVAR_GLOBALCONFIG|CVAR_NOINITCALL) diff --git a/src/win32/hardware.h b/src/win32/hardware.h index b2bafef32..184eeccf5 100644 --- a/src/win32/hardware.h +++ b/src/win32/hardware.h @@ -45,7 +45,7 @@ class IVideo virtual EDisplayType GetDisplayType () = 0; virtual void SetWindowedScale (float scale) = 0; - virtual DFrameBuffer *CreateFrameBuffer (int width, int height, bool fs, DFrameBuffer *old) = 0; + virtual DFrameBuffer *CreateFrameBuffer (int width, int height, bool bgra, bool fs, DFrameBuffer *old) = 0; virtual void StartModeIterator (int bits, bool fs) = 0; virtual bool NextMode (int *width, int *height, bool *letterbox) = 0; diff --git a/src/win32/win32iface.h b/src/win32/win32iface.h index 0b3333d63..d30475eb3 100644 --- a/src/win32/win32iface.h +++ b/src/win32/win32iface.h @@ -70,7 +70,7 @@ class Win32Video : public IVideo EDisplayType GetDisplayType () { return DISPLAY_Both; } void SetWindowedScale (float scale); - DFrameBuffer *CreateFrameBuffer (int width, int height, bool fs, DFrameBuffer *old); + DFrameBuffer *CreateFrameBuffer (int width, int height, bool bgra, bool fs, DFrameBuffer *old); void StartModeIterator (int bits, bool fs); bool NextMode (int *width, int *height, bool *letterbox); @@ -121,7 +121,7 @@ class BaseWinFB : public DFrameBuffer { DECLARE_ABSTRACT_CLASS(BaseWinFB, DFrameBuffer) public: - BaseWinFB (int width, int height) : DFrameBuffer (width, height), Windowed (true) {} + BaseWinFB (int width, int height, bool bgra) : DFrameBuffer (width, height, bgra), Windowed (true) {} bool IsFullscreen () { return !Windowed; } virtual void Blank () = 0; @@ -142,7 +142,6 @@ protected: BaseWinFB() {} }; -#ifdef USE_OBSOLETE_DDRAW class DDrawFB : public BaseWinFB { DECLARE_CLASS(DDrawFB, BaseWinFB) @@ -224,13 +223,12 @@ private: DDrawFB() {} }; -#endif class D3DFB : public BaseWinFB { DECLARE_CLASS(D3DFB, BaseWinFB) public: - D3DFB (UINT adapter, int width, int height, bool fullscreen); + D3DFB (UINT adapter, int width, int height, bool bgra, bool fullscreen); ~D3DFB (); bool IsValid (); diff --git a/src/win32/win32video.cpp b/src/win32/win32video.cpp index 3f3645d0b..5b2d5ef20 100644 --- a/src/win32/win32video.cpp +++ b/src/win32/win32video.cpp @@ -222,6 +222,13 @@ bool Win32Video::InitD3D9 () // Enumerate available display modes. FreeModes (); #ifndef PALETTEOUTPUT // To do: remove this again (AddD3DModes fails when there are too many modes available for videomenu to display) + + AddMode(320, 200, 8, 200, 0); + AddMode(320, 240, 8, 240, 0); + AddMode(640, 480, 8, 480, 0); + AddMode(800, 600, 8, 600, 0); + AddMode(1024, 768, 8, 768, 0); + AddMode(1920, 1080, 8, 1440, 0); // 1080p AddMode(1920*2, 1080*2, 8, 1440, 0); // 4k AddMode(2560, 1440, 8, 1440, 0); // 27" classic @@ -636,7 +643,7 @@ bool Win32Video::NextMode (int *width, int *height, bool *letterbox) return false; } -DFrameBuffer *Win32Video::CreateFrameBuffer (int width, int height, bool fullscreen, DFrameBuffer *old) +DFrameBuffer *Win32Video::CreateFrameBuffer (int width, int height, bool bgra, bool fullscreen, DFrameBuffer *old) { static int retry = 0; static int owidth, oheight; @@ -652,7 +659,8 @@ DFrameBuffer *Win32Video::CreateFrameBuffer (int width, int height, bool fullscr BaseWinFB *fb = static_cast (old); if (fb->Width == width && fb->Height == height && - fb->Windowed == !fullscreen) + fb->Windowed == !fullscreen && + fb->Bgra == bgra) { return old; } @@ -667,13 +675,9 @@ DFrameBuffer *Win32Video::CreateFrameBuffer (int width, int height, bool fullscr flashAmount = 0; } -#ifndef USE_OBSOLETE_DDRAW - fb = new D3DFB(m_Adapter, width, height, fullscreen); - LOG1("New fb created @ %p\n", fb); -#else if (D3D != NULL) { - fb = new D3DFB (m_Adapter, width, height, fullscreen); + fb = new D3DFB (m_Adapter, width, height, bgra, fullscreen); } else { @@ -738,10 +742,9 @@ DFrameBuffer *Win32Video::CreateFrameBuffer (int width, int height, bool fullscr } ++retry; - fb = static_cast(CreateFrameBuffer (width, height, fullscreen, NULL)); + fb = static_cast(CreateFrameBuffer (width, height, bgra, fullscreen, NULL)); } retry = 0; -#endif fb->SetFlash (flashColor, flashAmount); return fb; diff --git a/wadsrc/static/menudef.txt b/wadsrc/static/menudef.txt index 93e33ce79..3c712de96 100644 --- a/wadsrc/static/menudef.txt +++ b/wadsrc/static/menudef.txt @@ -661,7 +661,7 @@ OptionMenu "VideoOptions" Option "$DSPLYMNU_VSYNC", "vid_vsync", "OnOff" Option "$DSPLYMNU_CAPFPS", "cl_capfps", "OffOn" Option "$DSPLYMNU_COLUMNMETHOD", "r_columnmethod", "ColumnMethods" - Option "$DSPLYMNU_TRUECOLOR", "r_swtruecolor", "OnOff" + Option "$DSPLYMNU_TRUECOLOR", "swtruecolor", "OnOff" StaticText " " Option "$DSPLYMNU_WIPETYPE", "wipetype", "Wipes" From e929eec80f688f2afbd4a27ade847282aad9622d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 1 Jun 2016 05:28:14 +0200 Subject: [PATCH 07/94] Make x86 asm aware of swtruecolor --- src/doomtype.h | 5 + src/r_draw.cpp | 243 +++++++++++++++++++++++++++++++++------ src/r_draw.h | 1 + src/r_drawt.cpp | 25 ++-- src/r_plane.cpp | 41 ++++--- src/win32/win32video.cpp | 4 +- 6 files changed, 256 insertions(+), 63 deletions(-) diff --git a/src/doomtype.h b/src/doomtype.h index 39c59751d..9fca870d3 100644 --- a/src/doomtype.h +++ b/src/doomtype.h @@ -99,6 +99,11 @@ typedef TMap FClassMap; #endif +// Only use SSE intrinsics on Intel architecture +#if !defined(_M_IX86) && !defined(__i386__) && !defined(_M_X64) && !defined(__amd64__) +#define NO_SSE +#endif + #if defined(_MSC_VER) #define NOVTABLE __declspec(novtable) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 82169ec6f..d7b740973 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -1789,7 +1789,7 @@ void R_SetSpanSource(const BYTE *pixels) { ds_source = pixels; #ifdef X86_ASM - if (ds_cursource != ds_source) + if (!r_swtruecolor && ds_cursource != ds_source) { R_SetSpanSource_ASM(pixels); } @@ -1809,7 +1809,7 @@ void R_SetSpanColormap(BYTE *colormap) ds_colormap = colormap; ds_light = 0; #ifdef X86_ASM - if (ds_colormap != ds_curcolormap) + if (!r_swtruecolor && ds_colormap != ds_curcolormap) { R_SetSpanColormap_ASM (ds_colormap); } @@ -1838,7 +1838,8 @@ void R_SetupSpanBits(FTexture *tex) ds_ybits--; } #ifdef X86_ASM - R_SetSpanSize_ASM (ds_xbits, ds_ybits); + if (!r_swtruecolor) + R_SetSpanSize_ASM (ds_xbits, ds_ybits); #endif } @@ -1954,7 +1955,80 @@ void R_DrawSpanP_RGBA_C() { // 64x64 is the most common case by far, so special case it. + do + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + *dest++ = shade_pal_index(colormap[source[spot]], light); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + + do + { + // Current texture index in u,v. + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + *dest++ = shade_pal_index(colormap[source[spot]], light); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } +} + #ifndef NO_SSE +void R_DrawSpanP_RGBA_SSE() +{ + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const BYTE* source = ds_source; + const BYTE* colormap = ds_colormap; + int count; + int spot; + +#ifdef RANGECHECK + if (ds_x2 < ds_x1 || ds_x1 < 0 + || ds_x2 >= screen->width || ds_y > screen->height) + { + I_Error("R_DrawSpan: %i to %i at %i", ds_x1, ds_x2, ds_y); + } + // dscount++; +#endif + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + uint32_t light = calc_light_multiplier(ds_light); + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; @@ -2000,7 +2074,6 @@ void R_DrawSpanP_RGBA_C() } if (count == 0) return; -#endif do { @@ -2037,6 +2110,7 @@ void R_DrawSpanP_RGBA_C() } while (--count); } } +#endif #ifndef X86_ASM @@ -2971,6 +3045,12 @@ void (*domvline4)() = mvlineasm4; void setupvline (int fracbits) { + if (r_swtruecolor) + { + vlinebits = fracbits; + return; + } + #ifdef X86_ASM if (CPU.Family <= 5) { @@ -3075,23 +3155,43 @@ void vlinec4_RGBA() uint32_t *dest = (uint32_t*)dc_dest; int count = dc_count; int bits = vlinebits; + DWORD place; uint32_t light0 = calc_light_multiplier(palookuplight[0]); uint32_t light1 = calc_light_multiplier(palookuplight[1]); uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); + + do + { + dest[0] = shade_pal_index(palookupoffse[0][bufplce[0][(place = vplce[0]) >> bits]], light0); vplce[0] = place + vince[0]; + dest[1] = shade_pal_index(palookupoffse[1][bufplce[1][(place = vplce[1]) >> bits]], light1); vplce[1] = place + vince[1]; + dest[2] = shade_pal_index(palookupoffse[2][bufplce[2][(place = vplce[2]) >> bits]], light2); vplce[2] = place + vince[2]; + dest[3] = shade_pal_index(palookupoffse[3][bufplce[3][(place = vplce[3]) >> bits]], light3); vplce[3] = place + vince[3]; + dest += dc_pitch; + } while (--count); +} + #ifndef NO_SSE +void vlinec4_RGBA_SSE() +{ + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = vlinebits; + + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); + __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); uint32_t *palette = (uint32_t*)GPalette.BaseColors; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; -#endif do { -#ifndef NO_SSE - DWORD place0 = local_vplce[0]; DWORD place1 = local_vplce[1]; DWORD place2 = local_vplce[2]; @@ -3116,17 +3216,9 @@ void vlinec4_RGBA() fg_lo = _mm_srli_epi16(fg_lo, 8); fg = _mm_packus_epi16(fg_lo, fg_hi); _mm_storeu_si128((__m128i*)dest, fg); - -#else - dest[0] = shade_pal_index(palookupoffse[0][bufplce[0][(place = vplce[0]) >> bits]], light0); vplce[0] = place + vince[0]; - dest[1] = shade_pal_index(palookupoffse[1][bufplce[1][(place = vplce[1]) >> bits]], light1); vplce[1] = place + vince[1]; - dest[2] = shade_pal_index(palookupoffse[2][bufplce[2][(place = vplce[2]) >> bits]], light2); vplce[2] = place + vince[2]; - dest[3] = shade_pal_index(palookupoffse[3][bufplce[3][(place = vplce[3]) >> bits]], light3); vplce[3] = place + vince[3]; -#endif dest += dc_pitch; } while (--count); -#ifndef NO_SSE // Is this needed? Global variables makes it tricky to know.. vplce[0] = local_vplce[0]; vplce[1] = local_vplce[1]; @@ -3136,18 +3228,25 @@ void vlinec4_RGBA() vince[1] = local_vince[1]; vince[2] = local_vince[2]; vince[3] = local_vince[3]; -#endif } +#endif void setupmvline (int fracbits) { + if (!r_swtruecolor) + { #if defined(X86_ASM) - setupmvlineasm (fracbits); - domvline1 = mvlineasm1; - domvline4 = mvlineasm4; + setupmvlineasm(fracbits); + domvline1 = mvlineasm1; + domvline4 = mvlineasm4; #else - mvlinebits = fracbits; + mvlinebits = fracbits; #endif + } + else + { + mvlinebits = fracbits; + } } #if !defined(X86_ASM) @@ -3247,6 +3346,73 @@ void mvlinec4_RGBA() } while (--count); } +#ifndef NO_SSE +void mvlinec4_RGBA_SSE() +{ + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = vlinebits; + + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); + + __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); + __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + BYTE pix0 = bufplce[0][place0 >> bits]; + BYTE pix1 = bufplce[1][place1 >> bits]; + BYTE pix2 = bufplce[2][place2 >> bits]; + BYTE pix3 = bufplce[3][place3 >> bits]; + + // movemask = !(pix == 0) + __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); + + BYTE p0 = palookupoffse[0][pix0]; + BYTE p1 = palookupoffse[1][pix1]; + BYTE p2 = palookupoffse[2][pix2]; + BYTE p3 = palookupoffse[3][pix3]; + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + fg_hi = _mm_mullo_epi16(fg_hi, mlight_hi); + fg_hi = _mm_srli_epi16(fg_hi, 8); + fg_lo = _mm_mullo_epi16(fg_lo, mlight_lo); + fg_lo = _mm_srli_epi16(fg_lo, 8); + fg = _mm_packus_epi16(fg_lo, fg_hi); + _mm_maskmoveu_si128(fg, movemask, (char*)dest); + dest += dc_pitch; + } while (--count); + + // Is this needed? Global variables makes it tricky to know.. + vplce[0] = local_vplce[0]; + vplce[1] = local_vplce[1]; + vplce[2] = local_vplce[2]; + vplce[3] = local_vplce[3]; + vince[0] = local_vince[0]; + vince[1] = local_vince[1]; + vince[2] = local_vince[2]; + vince[3] = local_vince[3]; +} +#endif + extern "C" short spanend[MAXHEIGHT]; extern float rw_light; @@ -4138,14 +4304,28 @@ void R_InitColumnDrawers () if (r_swtruecolor) { + if (!pointers_saved) + { + pointers_saved = true; + dovline1_saved = dovline1; + doprevline1_saved = doprevline1; + domvline1_saved = domvline1; + dovline4_saved = dovline4; + domvline4_saved = domvline4; + } + R_DrawColumnHoriz = R_DrawColumnHorizP_RGBA_C; R_DrawColumn = R_DrawColumnP_RGBA_C; R_DrawFuzzColumn = R_DrawFuzzColumnP_RGBA_C; R_DrawTranslatedColumn = R_DrawTranslatedColumnP_RGBA_C; R_DrawShadedColumn = R_DrawShadedColumnP_RGBA_C; - R_DrawSpan = R_DrawSpanP_RGBA_C; R_DrawSpanMasked = R_DrawSpanMaskedP_RGBA_C; rt_map4cols = rt_map4cols_RGBA_c; +#ifndef NO_SSE + R_DrawSpan = R_DrawSpanP_RGBA_SSE; +#else + R_DrawSpan = R_DrawSpanP_RGBA_C; +#endif R_DrawSpanTranslucent = R_DrawSpanTranslucentP_RGBA_C; R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_RGBA_C; @@ -4208,21 +4388,18 @@ void R_InitColumnDrawers () rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_RGBA_c; rt_initcols = rt_initcols_rgba; - if (!pointers_saved) - { - pointers_saved = true; - dovline1_saved = dovline1; - doprevline1_saved = doprevline1; - domvline1_saved = domvline1; - dovline4_saved = dovline4; - domvline4_saved = domvline4; - } - dovline1 = vlinec1_RGBA; doprevline1 = vlinec1_RGBA; - dovline4 = vlinec4_RGBA; domvline1 = mvlinec1_RGBA; - domvline4 = mvlinec4_RGBA; + +#ifndef NO_SSE + dovline4 = vlinec4_RGBA_SSE; + domvline4 = mvlinec4_RGBA_SSE; +#else + dovline4 = vlinec4_RGBA; + domvline4 = mvlinec4_RGBA; +#endif + } else { diff --git a/src/r_draw.h b/src/r_draw.h index 2348914b6..d5007c885 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -269,6 +269,7 @@ void R_DrawFuzzColumnP_RGBA_C (void); void R_DrawTranslatedColumnP_RGBA_C (void); void R_DrawShadedColumnP_RGBA_C (void); void R_DrawSpanP_RGBA_C (void); +void R_DrawSpanP_RGBA_SSE (void); void R_DrawSpanMaskedP_RGBA_C (void); #endif diff --git a/src/r_drawt.cpp b/src/r_drawt.cpp index e47590c72..ca6862ed6 100644 --- a/src/r_drawt.cpp +++ b/src/r_drawt.cpp @@ -860,18 +860,21 @@ void rt_draw4cols (int sx) } #ifdef X86_ASM - // Setup assembly routines for changed colormaps or other parameters. - if (hcolfunc_post4 == rt_shaded4cols) + if (!r_swtruecolor) { - R_SetupShadedCol(); - } - else if (hcolfunc_post4 == rt_addclamp4cols || hcolfunc_post4 == rt_tlateaddclamp4cols) - { - R_SetupAddClampCol(); - } - else if (hcolfunc_post4 == rt_add4cols || hcolfunc_post4 == rt_tlateadd4cols) - { - R_SetupAddCol(); + // Setup assembly routines for changed colormaps or other parameters. + if (hcolfunc_post4 == rt_shaded4cols) + { + R_SetupShadedCol(); + } + else if (hcolfunc_post4 == rt_addclamp4cols || hcolfunc_post4 == rt_tlateaddclamp4cols) + { + R_SetupAddClampCol(); + } + else if (hcolfunc_post4 == rt_add4cols || hcolfunc_post4 == rt_tlateadd4cols) + { + R_SetupAddCol(); + } } #endif diff --git a/src/r_plane.cpp b/src/r_plane.cpp index a71590c9d..40e14c020 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -231,7 +231,7 @@ void R_MapPlane (int y, int x1) } #ifdef X86_ASM - if (ds_colormap != ds_curcolormap) + if (!r_swtruecolor && ds_colormap != ds_curcolormap) R_SetSpanColormap_ASM (ds_colormap); #endif @@ -1620,7 +1620,7 @@ void R_DrawSkyPlane (visplane_t *pl) void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) { #ifdef X86_ASM - if (ds_source != ds_cursource) + if (!r_swtruecolor && ds_source != ds_cursource) { R_SetSpanSource_ASM (ds_source); } @@ -1747,7 +1747,7 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t // //========================================================================== -void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) +void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) { static const float ifloatpow2[16] = { @@ -1782,7 +1782,7 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t // p is the texture origin in view space // Don't add in the offsets at this stage, because doing so can result in // errors if the flat is rotated. - ang = M_PI*3/2 - ViewAngle.Radians(); + ang = M_PI * 3 / 2 - ViewAngle.Radians(); cosine = cos(ang), sine = sin(ang); p[0] = ViewPos.X * cosine - ViewPos.Y * sine; p[2] = ViewPos.X * sine + ViewPos.Y * cosine; @@ -1793,25 +1793,25 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t cosine = cos(ang), sine = sin(ang); m[0] = yscale * cosine; m[2] = yscale * sine; -// m[1] = pl->height.ZatPointF (0, iyscale) - pl->height.ZatPointF (0,0)); -// VectorScale2 (m, 64.f/VectorLength(m)); + // m[1] = pl->height.ZatPointF (0, iyscale) - pl->height.ZatPointF (0,0)); + // VectorScale2 (m, 64.f/VectorLength(m)); - // n is the u direction vector in view space + // n is the u direction vector in view space #if 0 //let's use the sin/cosine we already know instead of computing new ones - ang += M_PI/2 - n[0] = -xscale * cos(ang); + ang += M_PI / 2 + n[0] = -xscale * cos(ang); n[2] = -xscale * sin(ang); #else n[0] = xscale * sine; n[2] = -xscale * cosine; #endif -// n[1] = pl->height.ZatPointF (ixscale, 0) - pl->height.ZatPointF (0,0)); -// VectorScale2 (n, 64.f/VectorLength(n)); + // n[1] = pl->height.ZatPointF (ixscale, 0) - pl->height.ZatPointF (0,0)); + // VectorScale2 (n, 64.f/VectorLength(n)); - // This code keeps the texture coordinates constant across the x,y plane no matter - // how much you slope the surface. Use the commented-out code above instead to keep - // the textures a constant size across the surface's plane instead. + // This code keeps the texture coordinates constant across the x,y plane no matter + // how much you slope the surface. Use the commented-out code above instead to keep + // the textures a constant size across the surface's plane instead. cosine = cos(planeang), sine = sin(planeang); m[1] = pl->height.ZatPoint(ViewPos.X + yscale * sine, ViewPos.Y + yscale * cosine) - zeroheight; n[1] = pl->height.ZatPoint(ViewPos.X - xscale * cosine, ViewPos.Y + xscale * sine) - zeroheight; @@ -1861,9 +1861,16 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t } #if defined(X86_ASM) - if (ds_source != ds_curtiltedsource) - R_SetTiltedSpanSource_ASM (ds_source); - R_MapVisPlane (pl, R_DrawTiltedPlane_ASM); + if (!r_swtruecolor) + { + if (ds_source != ds_curtiltedsource) + R_SetTiltedSpanSource_ASM(ds_source); + R_MapVisPlane(pl, R_DrawTiltedPlane_ASM); + } + else + { + R_MapVisPlane(pl, R_MapTiltedPlane); + } #else R_MapVisPlane (pl, R_MapTiltedPlane); #endif diff --git a/src/win32/win32video.cpp b/src/win32/win32video.cpp index 5b2d5ef20..a180a35ea 100644 --- a/src/win32/win32video.cpp +++ b/src/win32/win32video.cpp @@ -229,8 +229,8 @@ bool Win32Video::InitD3D9 () AddMode(800, 600, 8, 600, 0); AddMode(1024, 768, 8, 768, 0); - AddMode(1920, 1080, 8, 1440, 0); // 1080p - AddMode(1920*2, 1080*2, 8, 1440, 0); // 4k + AddMode(1920, 1080, 8, 1080, 0); // 1080p + AddMode(1920*2, 1080*2, 8, 1080*2, 0); // 4k AddMode(2560, 1440, 8, 1440, 0); // 27" classic AddMode(2560*2, 1440*2, 8, 1440*2, 0); // 5k #else From b9d7a98aeceac8987917db03c9eecce50b1b4abd Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 1 Jun 2016 06:02:37 +0200 Subject: [PATCH 08/94] Change swtruecolor cvar to take effect immediately --- src/posix/cocoa/i_video.mm | 11 ++++++++++- src/posix/sdl/hardware.cpp | 12 ++++++++++-- src/win32/hardware.cpp | 12 ++++++++++-- src/win32/win32video.cpp | 14 -------------- 4 files changed, 30 insertions(+), 19 deletions(-) diff --git a/src/posix/cocoa/i_video.mm b/src/posix/cocoa/i_video.mm index c97460a02..c2eb58c6d 100644 --- a/src/posix/cocoa/i_video.mm +++ b/src/posix/cocoa/i_video.mm @@ -96,7 +96,16 @@ EXTERN_CVAR(Bool, ticker ) EXTERN_CVAR(Bool, vid_vsync) EXTERN_CVAR(Bool, vid_hidpi) -CVAR(Bool, swtruecolor, false, CVAR_ARCHIVE) +CUSTOM_CVAR(Bool, swtruecolor, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) +{ + // Strictly speaking this doesn't require a mode switch, but it is the easiest + // way to force a CreateFramebuffer call without a lot of refactoring. + extern int NewWidth, NewHeight, NewBits, DisplayBits; + NewWidth = screen->GetWidth(); + NewHeight = screen->GetHeight(); + NewBits = DisplayBits; + setmodeneeded = true; +} CUSTOM_CVAR(Bool, fullscreen, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) { diff --git a/src/posix/sdl/hardware.cpp b/src/posix/sdl/hardware.cpp index 52bca35e7..9de4d03a4 100644 --- a/src/posix/sdl/hardware.cpp +++ b/src/posix/sdl/hardware.cpp @@ -281,10 +281,18 @@ CUSTOM_CVAR (Int, vid_maxfps, 200, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) } } -CVAR (Bool, swtruecolor, false, CVAR_ARCHIVE) - extern int NewWidth, NewHeight, NewBits, DisplayBits; +CUSTOM_CVAR(Bool, swtruecolor, false, CVAR_ARCHIVE|CVAR_GLOBALCONFIG|CVAR_NOINITCALL) +{ + // Strictly speaking this doesn't require a mode switch, but it is the easiest + // way to force a CreateFramebuffer call without a lot of refactoring. + NewWidth = screen->GetWidth(); + NewHeight = screen->GetHeight(); + NewBits = DisplayBits; + setmodeneeded = true; +} + CUSTOM_CVAR (Bool, fullscreen, false, CVAR_ARCHIVE|CVAR_GLOBALCONFIG) { NewWidth = screen->GetWidth(); diff --git a/src/win32/hardware.cpp b/src/win32/hardware.cpp index 8856924c0..49c970457 100644 --- a/src/win32/hardware.cpp +++ b/src/win32/hardware.cpp @@ -311,10 +311,18 @@ void I_RestoreWindowedPos () MoveWindow (Window, winx, winy, winw, winh, TRUE); } -CVAR (Bool, swtruecolor, false, CVAR_ARCHIVE) - extern int NewWidth, NewHeight, NewBits, DisplayBits; +CUSTOM_CVAR(Bool, swtruecolor, false, CVAR_ARCHIVE|CVAR_GLOBALCONFIG|CVAR_NOINITCALL) +{ + // Strictly speaking this doesn't require a mode switch, but it is the easiest + // way to force a CreateFramebuffer call without a lot of refactoring. + NewWidth = screen->GetWidth(); + NewHeight = screen->GetHeight(); + NewBits = DisplayBits; + setmodeneeded = true; +} + CUSTOM_CVAR (Bool, fullscreen, true, CVAR_ARCHIVE|CVAR_GLOBALCONFIG|CVAR_NOINITCALL) { NewWidth = screen->GetWidth(); diff --git a/src/win32/win32video.cpp b/src/win32/win32video.cpp index a180a35ea..74b10ef07 100644 --- a/src/win32/win32video.cpp +++ b/src/win32/win32video.cpp @@ -221,22 +221,8 @@ bool Win32Video::InitD3D9 () // Enumerate available display modes. FreeModes (); -#ifndef PALETTEOUTPUT // To do: remove this again (AddD3DModes fails when there are too many modes available for videomenu to display) - - AddMode(320, 200, 8, 200, 0); - AddMode(320, 240, 8, 240, 0); - AddMode(640, 480, 8, 480, 0); - AddMode(800, 600, 8, 600, 0); - AddMode(1024, 768, 8, 768, 0); - - AddMode(1920, 1080, 8, 1080, 0); // 1080p - AddMode(1920*2, 1080*2, 8, 1080*2, 0); // 4k - AddMode(2560, 1440, 8, 1440, 0); // 27" classic - AddMode(2560*2, 1440*2, 8, 1440*2, 0); // 5k -#else AddD3DModes (m_Adapter, D3DFMT_X8R8G8B8); AddD3DModes (m_Adapter, D3DFMT_R5G6B5); -#endif if (Args->CheckParm ("-2")) { // Force all modes to be pixel-doubled. ScaleModes (1); From 4f635983fcf791b52535b9c597bdc4e823fef635 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 1 Jun 2016 08:54:39 +0200 Subject: [PATCH 09/94] Add bgra support to OS X target --- src/posix/cocoa/i_video.mm | 30 +++++++++++++++++++++--------- src/r_main.cpp | 1 - src/r_swrenderer.cpp | 1 + src/v_draw.cpp | 6 ++++++ 4 files changed, 28 insertions(+), 10 deletions(-) diff --git a/src/posix/cocoa/i_video.mm b/src/posix/cocoa/i_video.mm index c2eb58c6d..425fe5887 100644 --- a/src/posix/cocoa/i_video.mm +++ b/src/posix/cocoa/i_video.mm @@ -249,7 +249,7 @@ private: class CocoaFrameBuffer : public DFrameBuffer { public: - CocoaFrameBuffer(int width, int height, bool fullscreen); + CocoaFrameBuffer(int width, int height, bool bgra, bool fullscreen); ~CocoaFrameBuffer(); virtual bool Lock(bool buffer); @@ -536,7 +536,7 @@ DFrameBuffer* CocoaVideo::CreateFrameBuffer(const int width, const int height, c if (NULL != old) { - if (width == m_width && height == m_height) + if (width == m_width && height == m_height && bgra == old->IsBgra()) { SetMode(width, height, fullscreen, vid_hidpi); return old; @@ -553,7 +553,7 @@ DFrameBuffer* CocoaVideo::CreateFrameBuffer(const int width, const int height, c delete old; } - CocoaFrameBuffer* fb = new CocoaFrameBuffer(width, height, fullscreen); + CocoaFrameBuffer* fb = new CocoaFrameBuffer(width, height, bgra, fullscreen); fb->SetFlash(flashColor, flashAmount); SetMode(width, height, fullscreen, vid_hidpi); @@ -772,8 +772,8 @@ CocoaVideo* CocoaVideo::GetInstance() } -CocoaFrameBuffer::CocoaFrameBuffer(int width, int height, bool fullscreen) -: DFrameBuffer(width, height, false) +CocoaFrameBuffer::CocoaFrameBuffer(int width, int height, bool bgra, bool fullscreen) +: DFrameBuffer(width, height, bgra) , m_needPaletteUpdate(false) , m_gamma(0.0f) , m_needGammaUpdate(false) @@ -867,8 +867,18 @@ void CocoaFrameBuffer::Update() FlipCycles.Reset(); BlitCycles.Clock(); - GPfx.Convert(MemBuffer, Pitch, m_pixelBuffer, Width * BYTES_PER_PIXEL, - Width, Height, FRACUNIT, FRACUNIT, 0, 0); + if (IsBgra()) + { + for (int y = 0; y < Height; y++) + { + memcpy((uint32_t*)m_pixelBuffer + y * Width, (uint32_t*)MemBuffer + y * Pitch, Width * BYTES_PER_PIXEL); + } + } + else + { + GPfx.Convert(MemBuffer, Pitch, m_pixelBuffer, Width * BYTES_PER_PIXEL, + Width, Height, FRACUNIT, FRACUNIT, 0, 0); + } FlipCycles.Clock(); Flip(); @@ -1000,8 +1010,10 @@ void CocoaFrameBuffer::Flip() static const GLenum format = GL_ABGR_EXT; #endif // __LITTLE_ENDIAN__ - glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, - Width, Height, 0, format, GL_UNSIGNED_BYTE, m_pixelBuffer); + if (IsBgra()) + glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, Width, Height, 0, GL_BGRA_EXT, GL_UNSIGNED_BYTE, m_pixelBuffer); + else + glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, Width, Height, 0, format, GL_UNSIGNED_BYTE, m_pixelBuffer); glBegin(GL_QUADS); glColor4f(1.0f, 1.0f, 1.0f, 1.0f); diff --git a/src/r_main.cpp b/src/r_main.cpp index aec8310d5..aaf8fc532 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -984,7 +984,6 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, if (r_swtruecolor != canvas->IsBgra()) { - r_swtruecolor = canvas->IsBgra(); R_InitColumnDrawers(); } } diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 15e2fda8f..ee6ac5fed 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -58,6 +58,7 @@ void R_InitRenderer(); void FSoftwareRenderer::Init() { + r_swtruecolor = screen->IsBgra(); R_InitRenderer(); } diff --git a/src/v_draw.cpp b/src/v_draw.cpp index b4f1ad4b5..57fac3cda 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -129,6 +129,12 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) static short bottomclipper[MAXWIDTH], topclipper[MAXWIDTH]; const BYTE *translation = NULL; + if (r_swtruecolor != IsBgra()) + { + r_swtruecolor = IsBgra(); + R_InitColumnDrawers(); + } + if (parms.masked) { spanptr = &spans; From 47f32d03cd2d0b8966361b49acb5d0b6de40b94b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 2 Jun 2016 14:49:03 +0200 Subject: [PATCH 10/94] Fixed some light and blending functions for the true color mode --- src/r_draw.cpp | 283 +++++++++++++++++++++++++------------------ src/r_draw.h | 2 + src/r_drawt_rgba.cpp | 76 +++++++----- src/r_main.h | 13 +- src/r_plane.cpp | 48 ++++++-- src/r_segs.cpp | 41 +++---- src/r_things.cpp | 13 +- src/v_draw.cpp | 6 +- 8 files changed, 284 insertions(+), 198 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index d7b740973..5a314e640 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -155,6 +155,8 @@ int dc_color; // [RH] Color for column filler DWORD dc_srccolor; DWORD *dc_srcblend; // [RH] Source and destination DWORD *dc_destblend; // blending lookups +fixed_t dc_srcalpha; // Alpha value used by dc_srcblend +fixed_t dc_destalpha; // Alpha value used by dc_destblend // first pixel in a column (possibly virtual) const BYTE* dc_source; @@ -414,9 +416,10 @@ void R_FillAddColumn_RGBA_C() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; - uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; - uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 24) & 0xff; + uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg_blue = fg & 0xff; do { @@ -478,9 +481,10 @@ void R_FillAddClampColumn_RGBA() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; - uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; - uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 24) & 0xff; + uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg_blue = fg & 0xff; do { @@ -541,9 +545,10 @@ void R_FillSubClampColumn_RGBA() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; - uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; - uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 24) & 0xff; + uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg_blue = fg & 0xff; do { @@ -604,9 +609,10 @@ void R_FillRevSubClampColumn_RGBA() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; - uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; - uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 24) & 0xff; + uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg_blue = fg & 0xff; do { @@ -946,6 +952,9 @@ void R_DrawAddColumnP_RGBA_C() int pitch = dc_pitch; BYTE *colormap = dc_colormap; + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], 0); @@ -958,9 +967,9 @@ void R_DrawAddColumnP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -1107,6 +1116,9 @@ void R_DrawTlatedAddColumnP_RGBA_C() const BYTE *source = dc_source; int pitch = dc_pitch; + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); @@ -1119,9 +1131,9 @@ void R_DrawTlatedAddColumnP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -1276,6 +1288,9 @@ void R_DrawAddClampColumnP_RGBA_C() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); @@ -1287,9 +1302,9 @@ void R_DrawAddClampColumnP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -1363,6 +1378,9 @@ void R_DrawAddClampTranslatedColumnP_RGBA_C() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); @@ -1374,9 +1392,9 @@ void R_DrawAddClampTranslatedColumnP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -1447,6 +1465,9 @@ void R_DrawSubClampColumnP_RGBA_C() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); @@ -1458,9 +1479,9 @@ void R_DrawSubClampColumnP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -1533,6 +1554,9 @@ void R_DrawSubClampTranslatedColumnP_RGBA_C() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); @@ -1544,9 +1568,9 @@ void R_DrawSubClampTranslatedColumnP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -1616,6 +1640,8 @@ void R_DrawRevSubClampColumnP_RGBA_C() const BYTE *source = dc_source; int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { @@ -1628,9 +1654,9 @@ void R_DrawRevSubClampColumnP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -1703,6 +1729,9 @@ void R_DrawRevSubClampTranslatedColumnP_RGBA_C() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); @@ -1714,9 +1743,9 @@ void R_DrawRevSubClampTranslatedColumnP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -1806,8 +1835,7 @@ void R_SetSpanSource(const BYTE *pixels) void R_SetSpanColormap(BYTE *colormap) { - ds_colormap = colormap; - ds_light = 0; + R_SetDSColorMapLight(colormap, 0, 0); #ifdef X86_ASM if (!r_swtruecolor && ds_colormap != ds_curcolormap) { @@ -2316,8 +2344,6 @@ void R_DrawSpanTranslucentP_RGBA_C() const BYTE* colormap = ds_colormap; int count; int spot; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -2331,6 +2357,9 @@ void R_DrawSpanTranslucentP_RGBA_C() uint32_t light = calc_light_multiplier(ds_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + if (ds_xbits == 6 && ds_ybits == 6) { // 64x64 is the most common case by far, so special case it. @@ -2347,9 +2376,9 @@ void R_DrawSpanTranslucentP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; @@ -2375,9 +2404,9 @@ void R_DrawSpanTranslucentP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; @@ -2474,11 +2503,12 @@ void R_DrawSpanMaskedTranslucentP_RGBA_C() const BYTE* colormap = ds_colormap; int count; int spot; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; uint32_t light = calc_light_multiplier(ds_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -2509,9 +2539,9 @@ void R_DrawSpanMaskedTranslucentP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -2542,9 +2572,9 @@ void R_DrawSpanMaskedTranslucentP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -2636,11 +2666,12 @@ void R_DrawSpanAddClampP_RGBA_C() const BYTE* colormap = ds_colormap; int count; int spot; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; uint32_t light = calc_light_multiplier(ds_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -2667,9 +2698,9 @@ void R_DrawSpanAddClampP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; @@ -2695,9 +2726,9 @@ void R_DrawSpanAddClampP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; @@ -2800,11 +2831,12 @@ void R_DrawSpanMaskedAddClampP_RGBA_C() const BYTE* colormap = ds_colormap; int count; int spot; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; uint32_t light = calc_light_multiplier(ds_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -2835,9 +2867,9 @@ void R_DrawSpanMaskedAddClampP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -2868,9 +2900,9 @@ void R_DrawSpanMaskedAddClampP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -3468,8 +3500,7 @@ void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip) clearbufshort (spanend+t2, b2-t2, x); } - dc_colormap = basecolormapdata + (rcolormap << COLORMAPSHIFT); - dc_light = 0; + R_SetColorMapLight(basecolormapdata, (float)light, wallshade); for (--x; x >= x1; --x) { @@ -3494,8 +3525,7 @@ void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip) clearbufshort (spanend+t2, b2-t2, x); } rcolormap = lcolormap; - dc_colormap = basecolormapdata + (lcolormap << COLORMAPSHIFT); - dc_light = 0; + R_SetColorMapLight(basecolormapdata, (float)light, wallshade); } else { @@ -3594,8 +3624,7 @@ void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip) clearbufshort(spanend + t2, b2 - t2, x); } - dc_colormap = basecolormapdata + (rcolormap << COLORMAPSHIFT); - dc_light = 0; + R_SetColorMapLight(basecolormapdata, (float)light, wallshade); for (--x; x >= x1; --x) { @@ -3620,8 +3649,7 @@ void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip) clearbufshort(spanend + t2, b2 - t2, x); } rcolormap = lcolormap; - dc_colormap = basecolormapdata + (lcolormap << COLORMAPSHIFT); - dc_light = 0; + R_SetColorMapLight(basecolormapdata, (float)light, wallshade); } else { @@ -3717,11 +3745,11 @@ fixed_t tmvline1_add_RGBA() int bits = tmvlinebits; int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { BYTE pix = source[frac >> bits]; @@ -3736,9 +3764,9 @@ fixed_t tmvline1_add_RGBA() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -3794,6 +3822,9 @@ void tmvline4_add_RGBA() light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { for (int i = 0; i < 4; ++i) @@ -3810,9 +3841,9 @@ void tmvline4_add_RGBA() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -3871,11 +3902,11 @@ fixed_t tmvline1_addclamp_RGBA() int bits = tmvlinebits; int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { BYTE pix = source[frac >> bits]; @@ -3890,9 +3921,9 @@ fixed_t tmvline1_addclamp_RGBA() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -3947,6 +3978,9 @@ void tmvline4_addclamp_RGBA() light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { for (int i = 0; i < 4; ++i) @@ -3963,9 +3997,9 @@ void tmvline4_addclamp_RGBA() uint32_t bg_green = (dest[i] >> 8) & 0xff; uint32_t bg_blue = (dest[i]) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -4023,6 +4057,9 @@ fixed_t tmvline1_subclamp_RGBA() uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { BYTE pix = source[frac >> bits]; @@ -4037,9 +4074,9 @@ fixed_t tmvline1_subclamp_RGBA() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -4093,6 +4130,9 @@ void tmvline4_subclamp_RGBA() light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { for (int i = 0; i < 4; ++i) @@ -4109,9 +4149,9 @@ void tmvline4_subclamp_RGBA() uint32_t bg_green = (dest[i] >> 8) & 0xff; uint32_t bg_blue = (dest[i]) & 0xff; - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -4169,6 +4209,9 @@ fixed_t tmvline1_revsubclamp_RGBA() uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { BYTE pix = source[frac >> bits]; @@ -4183,9 +4226,9 @@ fixed_t tmvline1_revsubclamp_RGBA() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -4233,15 +4276,15 @@ void tmvline4_revsubclamp_RGBA() int count = dc_count; int bits = tmvlinebits; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - uint32_t light[4]; light[0] = calc_light_multiplier(palookuplight[0]); light[1] = calc_light_multiplier(palookuplight[1]); light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { for (int i = 0; i < 4; ++i) @@ -4258,9 +4301,9 @@ void tmvline4_revsubclamp_RGBA() uint32_t bg_green = (dest[i] >> 8) & 0xff; uint32_t bg_blue = (dest[i]) & 0xff; - uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -4558,16 +4601,22 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags) { dc_srcblend = Col2RGB8_Inverse[fglevel>>10]; dc_destblend = Col2RGB8_LessPrecision[bglevel>>10]; + dc_srcalpha = fglevel; + dc_destalpha = bglevel; } else if (op == STYLEOP_Add && fglevel + bglevel <= FRACUNIT) { dc_srcblend = Col2RGB8[fglevel>>10]; dc_destblend = Col2RGB8[bglevel>>10]; + dc_srcalpha = fglevel; + dc_destalpha = bglevel; } else { dc_srcblend = Col2RGB8_LessPrecision[fglevel>>10]; dc_destblend = Col2RGB8_LessPrecision[bglevel>>10]; + dc_srcalpha = fglevel; + dc_destalpha = bglevel; } switch (op) { @@ -4736,12 +4785,15 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, hcolfunc_post1 = rt_shaded1col; hcolfunc_post4 = rt_shaded4cols; dc_color = fixedcolormap ? fixedcolormap[APART(color)] : basecolormap->Maps[APART(color)]; - dc_colormap = (basecolormap = &ShadeFakeColormap[16-alpha])->Maps; + lighttable_t *colormap = (basecolormap = &ShadeFakeColormap[16-alpha])->Maps; if (fixedlightlev >= 0 && fixedcolormap == NULL) { - dc_colormap += fixedlightlev; + R_SetColorMapLight(colormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + } + else + { + R_SetColorMapLight(colormap, 0, 0); } - dc_light = 0; return r_columnmethod ? DoDraw1 : DoDraw0; } @@ -4766,8 +4818,7 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, // with the alpha. dc_srccolor = ((((r*x)>>4)<<20) | ((g*x)>>4) | ((((b)*x)>>4)<<10)) & 0x3feffbff; hcolfunc_pre = R_FillColumnHoriz; - dc_colormap = identitymap; - dc_light = 0; + R_SetColorMapLight(identitymap, 0, 0); } if (!R_SetBlendFunc (style.BlendOp, fglevel, bglevel, style.Flags)) diff --git a/src/r_draw.h b/src/r_draw.h index d5007c885..f60b2299e 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -41,6 +41,8 @@ extern "C" int dc_color; // [RH] For flat colors (no texturing) extern "C" DWORD dc_srccolor; extern "C" DWORD *dc_srcblend; extern "C" DWORD *dc_destblend; +extern "C" fixed_t dc_srcalpha; +extern "C" fixed_t dc_destalpha; // first pixel in a column extern "C" const BYTE* dc_source; diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 872cb4b89..1725b80e4 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -308,6 +308,9 @@ void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { uint32_t fg = shade_pal_index(colormap[*source], light); uint32_t fg_red = (fg >> 16) & 0xff; @@ -318,9 +321,9 @@ void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; @@ -350,6 +353,9 @@ void rt_add4cols_RGBA_c (int sx, int yl, int yh) uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { for (int i = 0; i < 4; i++) { @@ -362,9 +368,9 @@ void rt_add4cols_RGBA_c (int sx, int yl, int yh) uint32_t bg_green = (dest[i] >> 8) & 0xff; uint32_t bg_blue = (dest[i]) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -496,6 +502,9 @@ void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { uint32_t fg = shade_pal_index(colormap[*source], light); uint32_t fg_red = (fg >> 16) & 0xff; @@ -506,9 +515,9 @@ void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; source += 4; @@ -537,6 +546,9 @@ void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { for (int i = 0; i < 4; i++) { @@ -549,9 +561,9 @@ void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) uint32_t bg_green = (dest[i] >> 8) & 0xff; uint32_t bg_blue = (dest[i]) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -595,6 +607,9 @@ void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { uint32_t fg = shade_pal_index(colormap[*source], light); uint32_t fg_red = (fg >> 16) & 0xff; @@ -605,9 +620,9 @@ void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; source += 4; @@ -636,6 +651,9 @@ void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { for (int i = 0; i < 4; i++) { @@ -648,9 +666,9 @@ void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) uint32_t bg_green = (dest[i] >> 8) & 0xff; uint32_t bg_blue = (dest[i]) & 0xff; - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -688,8 +706,6 @@ void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) return; count++; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; @@ -697,6 +713,9 @@ void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { uint32_t fg = shade_pal_index(colormap[*source], light); uint32_t fg_red = (fg >> 16) & 0xff; @@ -707,9 +726,9 @@ void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; source += 4; @@ -731,8 +750,6 @@ void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) return; count++; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; @@ -740,6 +757,9 @@ void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { for (int i = 0; i < 4; i++) { @@ -752,9 +772,9 @@ void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) uint32_t bg_green = (dest[i] >> 8) & 0xff; uint32_t bg_blue = (dest[i]) & 0xff; - uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } diff --git a/src/r_main.h b/src/r_main.h index 765635e5d..e8be3c1a3 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -82,15 +82,18 @@ extern bool r_dontmaplines; // Change R_CalcTiltedLighting() when this changes. #define GETPALOOKUP(vis,shade) (clamp (((shade)-FLOAT2FIXED(MIN(MAXLIGHTVIS,double(vis))))>>FRACBITS, 0, NUMCOLORMAPS-1)) -// Calculate the light multiplier for ds_light -// This is used instead of GETPALOOKUP when ds_colormap+dc_colormap is set to the base colormap -#define LIGHTSCALE(vis,shade) ((shade)-FLOAT2FIXED(MIN(MAXLIGHTVIS,double(vis)))) +// Calculate the light multiplier for dc_light/ds_light +// This is used instead of GETPALOOKUP when ds_colormap/dc_colormap is set to the base colormap +// Returns a value between 0 and 1 in fixed point +#define LIGHTSCALE(vis,shade) FLOAT2FIXED(clamp((FIXED2DBL(shade) - (MIN(MAXLIGHTVIS,double(vis)))) / NUMCOLORMAPS, 0.0, (NUMCOLORMAPS-1)/(double)NUMCOLORMAPS)) + +// Converts fixedlightlev into a shade value +#define FIXEDLIGHT2SHADE(lightlev) (((lightlev) >> COLORMAPSHIFT) << FRACBITS) // calculates the light constant passed to the shade_pal_index function inline uint32_t calc_light_multiplier(dsfixed_t light) { - // the 0.70 multiplier shouldn't be needed - maybe the palette shades in doom weren't linear? - return (uint32_t)clamp((1.0 - FIXED2DBL(light) / MAXLIGHTVIS * 0.70) * 256 + 0.5, 0.0, 256.0); + return 256 - (light >> (FRACBITS - 8)); } // Calculates a ARGB8 color for the given palette index and light multiplier diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 40e14c020..9805ab200 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -391,8 +391,7 @@ void R_MapTiltedPlane_C (int y, int x1) u = SQWORD(uz*z) + pviewx; v = SQWORD(vz*z) + pviewy; - ds_colormap = tiltlighting[i]; - ds_light = 0; + R_SetDSColorMapLight(tiltlighting[i], 0, 0); fb[i++] = ds_colormap[ds_source[(v >> vshift) | ((u >> ushift) & umask)]]; iz += plane_sz[0]; uz += plane_su[0]; @@ -515,8 +514,7 @@ void R_MapTiltedPlane_RGBA (int y, int x1) u = SQWORD(uz*z) + pviewx; v = SQWORD(vz*z) + pviewy; - ds_colormap = tiltlighting[i]; - ds_light = 0; + R_SetDSColorMapLight(tiltlighting[i], 0, 0); fb[i++] = ds_colormap[ds_source[(v >> vshift) | ((u >> ushift) & umask)]]; iz += plane_sz[0]; uz += plane_su[0]; @@ -1595,14 +1593,13 @@ void R_DrawSkyPlane (visplane_t *pl) bool fakefixed = false; if (fixedcolormap) { - dc_colormap = fixedcolormap; - dc_light = 0; + R_SetColorMapLight(fixedcolormap, 0, 0); } else { fakefixed = true; - fixedcolormap = dc_colormap = NormalLight.Maps; - dc_light = 0; + fixedcolormap = NormalLight.Maps; + R_SetColorMapLight(fixedcolormap, 0, 0); } R_DrawSky (pl); @@ -1685,11 +1682,19 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t GlobVis = r_FloorVisibility / planeheight; ds_light = 0; if (fixedlightlev >= 0) - ds_colormap = basecolormap->Maps + fixedlightlev, plane_shade = false; + { + R_SetDSColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + plane_shade = false; + } else if (fixedcolormap) - ds_colormap = fixedcolormap, plane_shade = false; + { + R_SetDSColorMapLight(fixedcolormap, 0, 0); + plane_shade = false; + } else + { plane_shade = true; + } if (spanfunc != R_FillSpan) { @@ -1702,12 +1707,16 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t spanfunc = R_DrawSpanMaskedTranslucent; dc_srcblend = Col2RGB8[alpha>>10]; dc_destblend = Col2RGB8[(OPAQUE-alpha)>>10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE - alpha; } else { spanfunc = R_DrawSpanMaskedAddClamp; dc_srcblend = Col2RGB8_LessPrecision[alpha>>10]; dc_destblend = Col2RGB8_LessPrecision[FRACUNIT>>10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE - alpha; } } else @@ -1724,12 +1733,16 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t spanfunc = R_DrawSpanTranslucent; dc_srcblend = Col2RGB8[alpha>>10]; dc_destblend = Col2RGB8[(OPAQUE-alpha)>>10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE - alpha; } else { spanfunc = R_DrawSpanAddClamp; dc_srcblend = Col2RGB8_LessPrecision[alpha>>10]; dc_destblend = Col2RGB8_LessPrecision[FRACUNIT>>10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE - alpha; } } else @@ -1846,11 +1859,20 @@ void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t a ds_light = 0; if (fixedlightlev >= 0) - ds_colormap = basecolormap->Maps + fixedlightlev, plane_shade = false; + { + R_SetDSColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + plane_shade = false; + } else if (fixedcolormap) - ds_colormap = fixedcolormap, plane_shade = false; + { + R_SetDSColorMapLight(fixedcolormap, 0, 0); + plane_shade = false; + } else - ds_colormap = basecolormap->Maps, plane_shade = true; + { + R_SetDSColorMapLight(basecolormap->Maps, 0, 0); + plane_shade = true; + } if (!plane_shade) { diff --git a/src/r_segs.cpp b/src/r_segs.cpp index cab97adfc..43590247e 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -177,8 +177,7 @@ static void BlastMaskedColumn (void (*blastfunc)(const BYTE *pixels, const FText // calculate lighting if (fixedcolormap == NULL && fixedlightlev < 0) { - dc_colormap = basecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); - dc_light = 0; + R_SetColorMapLight(basecolormap->Maps, rw_light, wallshade); } dc_iscale = xs_Fix<16>::ToFix(MaskedSWall[dc_x] * MaskedScaleY); @@ -314,10 +313,9 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) rw_scalestep = ds->iscalestep; if (fixedlightlev >= 0) - dc_colormap = basecolormap->Maps + fixedlightlev; + R_SetColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - dc_colormap = fixedcolormap; - dc_light = 0; + R_SetColorMapLight(fixedcolormap, 0, 0); // find positioning texheight = tex->GetScaledHeightDouble(); @@ -632,10 +630,9 @@ void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover) } if (fixedlightlev >= 0) - dc_colormap = basecolormap->Maps + fixedlightlev; + R_SetColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - dc_colormap = fixedcolormap; - dc_light = 0; + R_SetColorMapLight(fixedcolormap, 0, 0); WallC.sz1 = ds->sz1; WallC.sz2 = ds->sz2; @@ -1435,11 +1432,11 @@ static void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *d } } -inline fixed_t mvline1 (fixed_t vince, BYTE *colormap, int count, fixed_t vplce, const BYTE *bufplce, BYTE *dest) +inline fixed_t mvline1 (fixed_t vince, BYTE *colormap, fixed_t light, int count, fixed_t vplce, const BYTE *bufplce, BYTE *dest) { dc_iscale = vince; dc_colormap = colormap; - dc_light = 0; + dc_light = light; dc_count = count; dc_texturefrac = vplce; dc_source = bufplce; @@ -1508,8 +1505,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ if (!fixed) { // calculate lighting - dc_colormap = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); - dc_light = 0; + R_SetColorMapLight(basecolormapdata, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1569,7 +1565,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ { if (!(bad & 1)) { - mvline1(vince[z],palookupoffse[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); + mvline1(vince[z],palookupoffse[z],palookuplight[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); } bad >>= 1; } @@ -1580,7 +1576,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ { if (u4 > y1ve[z]) { - vplce[z] = mvline1(vince[z],palookupoffse[z],u4-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); + vplce[z] = mvline1(vince[z],palookupoffse[z],palookuplight[z],u4-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); } } @@ -1596,7 +1592,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ { if (y2ve[z] > d4) { - mvline1(vince[z],palookupoffse[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z*pixelsize); + mvline1(vince[z],palookupoffse[0],palookuplight[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z*pixelsize); } } } @@ -1609,8 +1605,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ if (!fixed) { // calculate lighting - dc_colormap = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); - dc_light = 0; + R_SetColorMapLight(basecolormapdata, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1844,10 +1839,9 @@ void R_RenderSegLoop () fixed_t xoffset = rw_offset; if (fixedlightlev >= 0) - dc_colormap = basecolormap->Maps + fixedlightlev; + R_SetColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - dc_colormap = fixedcolormap; - dc_light = 0; + R_SetColorMapLight(fixedcolormap, 0, 0); // clip wall to the floor and ceiling for (x = x1; x < x2; ++x) @@ -3244,14 +3238,13 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, rw_light = rw_lightleft + (x1 - WallC.sx1) * rw_lightstep; if (fixedlightlev >= 0) - dc_colormap = usecolormap->Maps + fixedlightlev; + R_SetColorMapLight(usecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - dc_colormap = fixedcolormap; + R_SetColorMapLight(fixedcolormap, 0, 0); else if (!foggy && (decal->RenderFlags & RF_FULLBRIGHT)) - dc_colormap = usecolormap->Maps; + R_SetColorMapLight(usecolormap->Maps, 0, 0); else calclighting = true; - dc_light = 0; // Draw it if (decal->RenderFlags & RF_YFLIP) diff --git a/src/r_things.cpp b/src/r_things.cpp index f52c80376..98557817d 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -408,8 +408,7 @@ void R_DrawVisSprite (vissprite_t *vis) } fixed_t centeryfrac = FLOAT2FIXED(CenterY); - dc_colormap = vis->Style.colormap; - dc_light = 0; + R_SetColorMapLight(vis->Style.colormap, 0, 0); mode = R_SetPatchStyle (vis->Style.RenderStyle, vis->Style.Alpha, vis->Translation, vis->FillColor); @@ -539,14 +538,13 @@ void R_DrawWallSprite(vissprite_t *spr) rw_lightstep = float((GlobVis / spr->wallc.sz2 - rw_lightleft) / (spr->wallc.sx2 - spr->wallc.sx1)); rw_light = rw_lightleft + (x1 - spr->wallc.sx1) * rw_lightstep; if (fixedlightlev >= 0) - dc_colormap = usecolormap->Maps + fixedlightlev; + R_SetColorMapLight(usecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - dc_colormap = fixedcolormap; + R_SetColorMapLight(fixedcolormap, 0, 0); else if (!foggy && (spr->renderflags & RF_FULLBRIGHT)) - dc_colormap = usecolormap->Maps; + R_SetColorMapLight(usecolormap->Maps, 0, 0); else calclighting = true; - dc_light = 0; // Draw it WallSpriteTile = spr->pic; @@ -656,8 +654,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop int flags = 0; // Do setup for blending. - dc_colormap = spr->Style.colormap; - dc_light = 0; + R_SetColorMapLight(spr->Style.colormap, 0, 0); mode = R_SetPatchStyle(spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor); if (mode == DontDraw) diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 57fac3cda..ff0427b34 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -171,13 +171,11 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) if (translation != NULL) { - dc_colormap = (lighttable_t *)translation; - dc_light = 0; + R_SetColorMapLight((lighttable_t *)translation, 0, 0); } else { - dc_colormap = identitymap; - dc_light = 0; + R_SetColorMapLight(identitymap, 0, 0); } fixedcolormap = dc_colormap; From 41537a50ab9f9aeb5f07e121ed8d1396dd7d261a Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 2 Jun 2016 16:52:41 +0200 Subject: [PATCH 11/94] Fix true color light calculation bug for decals --- src/r_draw.cpp | 8 ++++---- src/r_drawt_rgba.cpp | 4 ++-- src/r_things.cpp | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 5a314e640..c190c1e73 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -416,7 +416,7 @@ void R_FillAddColumn_RGBA_C() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -481,7 +481,7 @@ void R_FillAddClampColumn_RGBA() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -545,7 +545,7 @@ void R_FillSubClampColumn_RGBA() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -609,7 +609,7 @@ void R_FillRevSubClampColumn_RGBA() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 1725b80e4..60520783d 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -413,7 +413,7 @@ void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh) source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -455,7 +455,7 @@ void rt_shaded4cols_RGBA_c (int sx, int yl, int yh) source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; diff --git a/src/r_things.cpp b/src/r_things.cpp index 98557817d..3fcefe038 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -416,7 +416,7 @@ void R_DrawVisSprite (vissprite_t *vis) { // For shaded sprites, R_SetPatchStyle sets a dc_colormap to an alpha table, but // it is the brightest one. We need to get back to the proper light level for // this sprite. - dc_colormap += vis->ColormapNum << COLORMAPSHIFT; + R_SetColorMapLight(dc_colormap, 0, vis->ColormapNum << FRACBITS); } if (mode != DontDraw) @@ -2704,9 +2704,9 @@ void R_DrawParticle_RGBA(vissprite_t *vis) uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = (fg_red + bg_red * alpha) / 256; - uint32_t green = (fg_green + bg_green * alpha) / 256; - uint32_t blue = (fg_blue + bg_blue * alpha) / 256; + uint32_t red = (fg_red + bg_red * inv_alpha) / 256; + uint32_t green = (fg_green + bg_green * inv_alpha) / 256; + uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += spacing; From 672b80898b720c03ea10367259cc7b524cc4bead Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 2 Jun 2016 19:26:27 +0200 Subject: [PATCH 12/94] Moved ColormapNum to visstyle_t and changed colormap to BaseColormap --- src/g_shared/a_artifacts.cpp | 3 +- src/r_defs.h | 3 +- src/r_things.cpp | 115 ++++++++++++++++++++--------------- src/r_things.h | 1 - 4 files changed, 70 insertions(+), 52 deletions(-) diff --git a/src/g_shared/a_artifacts.cpp b/src/g_shared/a_artifacts.cpp index d36cdfe65..777d6824a 100644 --- a/src/g_shared/a_artifacts.cpp +++ b/src/g_shared/a_artifacts.cpp @@ -737,7 +737,8 @@ int APowerInvisibility::AlterWeaponSprite (visstyle_t *vis) if ((vis->Alpha < 0.25f && special1 > 0) || (vis->Alpha == 0)) { vis->Alpha = clamp((1.f - float(Strength/100)), 0.f, 1.f); - vis->colormap = SpecialColormaps[INVERSECOLORMAP].Colormap; + vis->BaseColormap = SpecialColormaps[INVERSECOLORMAP].Colormap; + vis->ColormapNum = 0; } return -1; // This item is valid so another one shouldn't reset the translucency } diff --git a/src/r_defs.h b/src/r_defs.h index f27ac2716..8a247a5c0 100644 --- a/src/r_defs.h +++ b/src/r_defs.h @@ -1401,7 +1401,8 @@ typedef BYTE lighttable_t; // This could be wider for >8 bit display. // This encapsulates the fields of vissprite_t that can be altered by AlterWeaponSprite struct visstyle_t { - lighttable_t *colormap; + int ColormapNum; // Which colormap is rendered + lighttable_t *BaseColormap; // Base colormap used together with ColormapNum float Alpha; FRenderStyle RenderStyle; }; diff --git a/src/r_things.cpp b/src/r_things.cpp index 3fcefe038..6f8038148 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -408,7 +408,7 @@ void R_DrawVisSprite (vissprite_t *vis) } fixed_t centeryfrac = FLOAT2FIXED(CenterY); - R_SetColorMapLight(vis->Style.colormap, 0, 0); + R_SetColorMapLight(vis->Style.BaseColormap, 0, vis->Style.ColormapNum << FRACBITS); mode = R_SetPatchStyle (vis->Style.RenderStyle, vis->Style.Alpha, vis->Translation, vis->FillColor); @@ -416,7 +416,7 @@ void R_DrawVisSprite (vissprite_t *vis) { // For shaded sprites, R_SetPatchStyle sets a dc_colormap to an alpha table, but // it is the brightest one. We need to get back to the proper light level for // this sprite. - R_SetColorMapLight(dc_colormap, 0, vis->ColormapNum << FRACBITS); + R_SetColorMapLight(dc_colormap, 0, vis->Style.ColormapNum << FRACBITS); } if (mode != DontDraw) @@ -654,7 +654,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop int flags = 0; // Do setup for blending. - R_SetColorMapLight(spr->Style.colormap, 0, 0); + R_SetColorMapLight(spr->Style.BaseColormap, 0, spr->Style.ColormapNum << FRACBITS); mode = R_SetPatchStyle(spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor); if (mode == DontDraw) @@ -680,7 +680,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop // Render the voxel, either directly to the screen or offscreen. R_DrawVoxel(spr->pa.vpos, spr->pa.vang, spr->gpos, spr->Angle, - spr->xscale, FLOAT2FIXED(spr->yscale), spr->voxel, spr->Style.colormap, cliptop, clipbot, + spr->xscale, FLOAT2FIXED(spr->yscale), spr->voxel, spr->Style.BaseColormap + (spr->Style.ColormapNum << COLORMAPSHIFT), cliptop, clipbot, minslabz, maxslabz, flags); // Blend the voxel, if that's what we need to do. @@ -1058,7 +1058,7 @@ void R_ProjectSprite (AActor *thing, int fakeside, F3DFloor *fakefloor, F3DFloor vis->Style.Alpha = float(thing->Alpha); vis->fakefloor = fakefloor; vis->fakeceiling = fakeceiling; - vis->ColormapNum = 0; + vis->Style.ColormapNum = 0; vis->bInMirror = MirrorFlags & RF_XFLIP; vis->bSplitSprite = false; @@ -1110,7 +1110,8 @@ void R_ProjectSprite (AActor *thing, int fakeside, F3DFloor *fakefloor, F3DFloor // get light level if (fixedcolormap != NULL) { // fixed map - vis->Style.colormap = fixedcolormap; + vis->Style.BaseColormap = fixedcolormap; + vis->Style.ColormapNum = 0; } else { @@ -1120,17 +1121,19 @@ void R_ProjectSprite (AActor *thing, int fakeside, F3DFloor *fakefloor, F3DFloor } if (fixedlightlev >= 0) { - vis->Style.colormap = mybasecolormap->Maps + fixedlightlev; + vis->Style.BaseColormap = mybasecolormap->Maps; + vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; } else if (!foggy && ((renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT))) { // full bright - vis->Style.colormap = mybasecolormap->Maps; + vis->Style.BaseColormap = mybasecolormap->Maps; + vis->Style.ColormapNum = 0; } else { // diminished light - vis->ColormapNum = GETPALOOKUP( + vis->Style.ColormapNum = GETPALOOKUP( r_SpriteVisibility / MAX(tz, MINZ), spriteshade); - vis->Style.colormap = mybasecolormap->Maps + (vis->ColormapNum << COLORMAPSHIFT); + vis->Style.BaseColormap = mybasecolormap->Maps; } } } @@ -1199,14 +1202,13 @@ static void R_ProjectWallSprite(AActor *thing, const DVector3 &pos, FTextureID p vis->Style.Alpha = float(thing->Alpha); vis->fakefloor = NULL; vis->fakeceiling = NULL; - vis->ColormapNum = 0; vis->bInMirror = MirrorFlags & RF_XFLIP; vis->pic = pic; vis->bIsVoxel = false; vis->bWallSprite = true; - vis->ColormapNum = GETPALOOKUP( + vis->Style.ColormapNum = GETPALOOKUP( r_SpriteVisibility / MAX(tz, MINZ), spriteshade); - vis->Style.colormap = basecolormap->Maps + (vis->ColormapNum << COLORMAPSHIFT); + vis->Style.BaseColormap = basecolormap->Maps; vis->wallc = wallc; } @@ -1376,7 +1378,7 @@ void R_DrawPSprite (pspdef_t* psp, int pspnum, AActor *owner, double sx, double vis->yscale = float(pspriteyscale / tex->Scale.Y); vis->Translation = 0; // [RH] Use default colors vis->pic = tex; - vis->ColormapNum = 0; + vis->Style.ColormapNum = 0; if (flip) { @@ -1426,7 +1428,8 @@ void R_DrawPSprite (pspdef_t* psp, int pspnum, AActor *owner, double sx, double if (realfixedcolormap != NULL) { // fixed color - vis->Style.colormap = realfixedcolormap->Colormap; + vis->Style.BaseColormap = realfixedcolormap->Colormap; + vis->Style.ColormapNum = 0; } else { @@ -1436,35 +1439,39 @@ void R_DrawPSprite (pspdef_t* psp, int pspnum, AActor *owner, double sx, double } if (fixedlightlev >= 0) { - vis->Style.colormap = mybasecolormap->Maps + fixedlightlev; + vis->Style.BaseColormap = mybasecolormap->Maps; + vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; } else if (!foggy && psp->state->GetFullbright()) { // full bright - vis->Style.colormap = mybasecolormap->Maps; // [RH] use basecolormap + vis->Style.BaseColormap = mybasecolormap->Maps; // [RH] use basecolormap + vis->Style.ColormapNum = 0; } else { // local light - vis->Style.colormap = mybasecolormap->Maps + (GETPALOOKUP (0, spriteshade) << COLORMAPSHIFT); + vis->Style.BaseColormap = mybasecolormap->Maps; + vis->Style.ColormapNum = GETPALOOKUP(0, spriteshade); } } if (camera->Inventory != NULL) { - lighttable_t *oldcolormap = vis->Style.colormap; + BYTE oldcolormapnum = vis->Style.ColormapNum; + lighttable_t *oldcolormap = vis->Style.BaseColormap; camera->Inventory->AlterWeaponSprite (&vis->Style); - if (vis->Style.colormap != oldcolormap) + if (vis->Style.BaseColormap != oldcolormap || vis->Style.ColormapNum != oldcolormapnum) { // The colormap has changed. Is it one we can easily identify? // If not, then don't bother trying to identify it for // hardware accelerated drawing. - if (vis->Style.colormap < SpecialColormaps[0].Colormap || - vis->Style.colormap > SpecialColormaps.Last().Colormap) + if (vis->Style.BaseColormap < SpecialColormaps[0].Colormap || + vis->Style.BaseColormap > SpecialColormaps.Last().Colormap) { noaccel = true; } // Has the basecolormap changed? If so, we can't hardware accelerate it, // since we don't know what it is anymore. - else if (vis->Style.colormap < mybasecolormap->Maps || - vis->Style.colormap >= mybasecolormap->Maps + NUMCOLORMAPS*256) + else if (vis->Style.BaseColormap < mybasecolormap->Maps || + vis->Style.BaseColormap >= mybasecolormap->Maps + NUMCOLORMAPS*256) { noaccel = true; } @@ -1472,8 +1479,8 @@ void R_DrawPSprite (pspdef_t* psp, int pspnum, AActor *owner, double sx, double } // If we're drawing with a special colormap, but shaders for them are disabled, do // not accelerate. - if (!r_shadercolormaps && (vis->Style.colormap >= SpecialColormaps[0].Colormap && - vis->Style.colormap <= SpecialColormaps.Last().Colormap)) + if (!r_shadercolormaps && (vis->Style.BaseColormap >= SpecialColormaps[0].Colormap && + vis->Style.BaseColormap <= SpecialColormaps.Last().Colormap)) { noaccel = true; } @@ -1495,7 +1502,8 @@ void R_DrawPSprite (pspdef_t* psp, int pspnum, AActor *owner, double sx, double else { colormap_to_use = basecolormap; - vis->Style.colormap = basecolormap->Maps; + vis->Style.BaseColormap = basecolormap->Maps; + vis->Style.ColormapNum = 0; vis->Style.RenderStyle = STYLE_Normal; } @@ -1641,18 +1649,18 @@ void R_DrawRemainingPlayerSprites() FColormapStyle colormapstyle; bool usecolormapstyle = false; - if (vis->Style.colormap >= SpecialColormaps[0].Colormap && - vis->Style.colormap < SpecialColormaps[SpecialColormaps.Size()].Colormap) + if (vis->Style.BaseColormap >= SpecialColormaps[0].Colormap && + vis->Style.BaseColormap < SpecialColormaps[SpecialColormaps.Size()].Colormap) { // Yuck! There needs to be a better way to store colormaps in the vissprite... :( - ptrdiff_t specialmap = (vis->Style.colormap - SpecialColormaps[0].Colormap) / sizeof(FSpecialColormap); + ptrdiff_t specialmap = (vis->Style.BaseColormap - SpecialColormaps[0].Colormap) / sizeof(FSpecialColormap) + vis->Style.ColormapNum; special = &SpecialColormaps[specialmap]; } else if (colormap->Color == PalEntry(255,255,255) && colormap->Desaturate == 0) { overlay = colormap->Fade; - overlay.a = BYTE(((vis->Style.colormap - colormap->Maps) >> 8) * 255 / NUMCOLORMAPS); + overlay.a = BYTE(vis->Style.ColormapNum * 255 / NUMCOLORMAPS); } else { @@ -1660,7 +1668,7 @@ void R_DrawRemainingPlayerSprites() colormapstyle.Color = colormap->Color; colormapstyle.Fade = colormap->Fade; colormapstyle.Desaturate = colormap->Desaturate; - colormapstyle.FadeLevel = ((vis->Style.colormap - colormap->Maps) >> 8) / float(NUMCOLORMAPS); + colormapstyle.FadeLevel = vis->Style.ColormapNum / float(NUMCOLORMAPS); } screen->DrawTexture(vis->pic, viewwindowx + VisPSpritesX1[i], @@ -1904,7 +1912,8 @@ void R_DrawSprite (vissprite_t *spr) int r1, r2; short topclip, botclip; short *clip1, *clip2; - lighttable_t *colormap = spr->Style.colormap; + lighttable_t *colormap = spr->Style.BaseColormap; + int colormapnum = spr->Style.ColormapNum; F3DFloor *rover; FDynamicColormap *mybasecolormap; @@ -2001,17 +2010,19 @@ void R_DrawSprite (vissprite_t *spr) } if (fixedlightlev >= 0) { - spr->Style.colormap = mybasecolormap->Maps + fixedlightlev; + spr->Style.BaseColormap = mybasecolormap->Maps; + spr->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; } else if (!foggy && (spr->renderflags & RF_FULLBRIGHT)) { // full bright - spr->Style.colormap = mybasecolormap->Maps; + spr->Style.BaseColormap = mybasecolormap->Maps; + spr->Style.ColormapNum = 0; } else { // diminished light spriteshade = LIGHT2SHADE(sec->lightlevel + r_actualextralight); - spr->Style.colormap = mybasecolormap->Maps + (GETPALOOKUP ( - r_SpriteVisibility / MAX(MINZ, (double)spr->depth), spriteshade) << COLORMAPSHIFT); + spr->Style.BaseColormap = mybasecolormap->Maps; + spr->Style.ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(MINZ, (double)spr->depth), spriteshade); } } } @@ -2159,7 +2170,8 @@ void R_DrawSprite (vissprite_t *spr) if (topclip >= botclip) { - spr->Style.colormap = colormap; + spr->Style.BaseColormap = colormap; + spr->Style.ColormapNum = colormapnum; return; } @@ -2289,7 +2301,8 @@ void R_DrawSprite (vissprite_t *spr) } if (i == x2) { - spr->Style.colormap = colormap; + spr->Style.BaseColormap = colormap; + spr->Style.ColormapNum = colormapnum; return; } } @@ -2307,7 +2320,8 @@ void R_DrawSprite (vissprite_t *spr) int maxvoxely = spr->gzb > hzb ? INT_MAX : xs_RoundToInt((spr->gzt - hzb) / spr->yscale); R_DrawVisVoxel(spr, minvoxely, maxvoxely, cliptop, clipbot); } - spr->Style.colormap = colormap; + spr->Style.BaseColormap = colormap; + spr->Style.ColormapNum = colormapnum; } // kg3D: @@ -2551,25 +2565,28 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade, vis->renderflags = particle->trans; vis->FakeFlatStat = fakeside; vis->floorclip = 0; - vis->ColormapNum = 0; + vis->Style.ColormapNum = 0; if (fixedlightlev >= 0) { - vis->Style.colormap = map + fixedlightlev; + vis->Style.BaseColormap = map; + vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; } else if (fixedcolormap) { - vis->Style.colormap = fixedcolormap; + vis->Style.BaseColormap = fixedcolormap; + vis->Style.ColormapNum = 0; } else if (particle->bright) { - vis->Style.colormap = map; + vis->Style.BaseColormap = map; + vis->Style.ColormapNum = 0; } else { // Particles are slightly more visible than regular sprites. - vis->ColormapNum = GETPALOOKUP(tiz * r_SpriteVisibility * 0.5, shade); - vis->Style.colormap = map + (vis->ColormapNum << COLORMAPSHIFT); + vis->Style.ColormapNum = GETPALOOKUP(tiz * r_SpriteVisibility * 0.5, shade); + vis->Style.BaseColormap = map; } } @@ -2602,7 +2619,7 @@ void R_DrawParticle_C (vissprite_t *vis) { int spacing; BYTE *dest; - BYTE color = vis->Style.colormap[vis->startfrac]; + BYTE color = vis->Style.BaseColormap[(vis->Style.ColormapNum << COLORMAPSHIFT) + vis->startfrac]; int yl = vis->y1; int ycount = vis->y2 - yl + 1; int x1 = vis->x1; @@ -2668,7 +2685,7 @@ void R_DrawParticle_RGBA(vissprite_t *vis) { int spacing; uint32_t *dest; - BYTE color = vis->Style.colormap[vis->startfrac]; + BYTE color = vis->Style.BaseColormap[vis->startfrac]; int yl = vis->y1; int ycount = vis->y2 - yl + 1; int x1 = vis->x1; @@ -2676,7 +2693,7 @@ void R_DrawParticle_RGBA(vissprite_t *vis) R_DrawMaskedSegsBehindParticle(vis); - uint32_t fg = shade_pal_index(color, calc_light_multiplier(0)); + uint32_t fg = shade_pal_index(color, calc_light_multiplier(LIGHTSCALE(0, vis->Style.ColormapNum << FRACBITS))); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; diff --git a/src/r_things.h b/src/r_things.h index 057b7cfe2..785729b09 100644 --- a/src/r_things.h +++ b/src/r_things.h @@ -86,7 +86,6 @@ struct vissprite_t BYTE bSplitSprite:1; // [RH] Sprite was split by a drawseg BYTE bInMirror:1; // [RH] Sprite is "inside" a mirror BYTE FakeFlatStat; // [RH] which side of fake/floor ceiling sprite is on - BYTE ColormapNum; // Which colormap is rendered (needed for shaded drawer) short renderflags; DWORD Translation; // [RH] for color translation visstyle_t Style; From 02a39ef4576204463474a71beef359848cac032d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 2 Jun 2016 20:05:08 +0200 Subject: [PATCH 13/94] Added bgra support to SDL target --- src/posix/sdl/sdlvideo.cpp | 46 ++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/src/posix/sdl/sdlvideo.cpp b/src/posix/sdl/sdlvideo.cpp index b050097be..26121aa71 100644 --- a/src/posix/sdl/sdlvideo.cpp +++ b/src/posix/sdl/sdlvideo.cpp @@ -28,7 +28,7 @@ class SDLFB : public DFrameBuffer { DECLARE_CLASS(SDLFB, DFrameBuffer) public: - SDLFB (int width, int height, bool fullscreen, SDL_Window *oldwin); + SDLFB (int width, int height, bool bgra, bool fullscreen, SDL_Window *oldwin); ~SDLFB (); bool Lock (bool buffer); @@ -271,7 +271,8 @@ DFrameBuffer *SDLVideo::CreateFrameBuffer (int width, int height, bool bgra, boo { // Reuse the old framebuffer if its attributes are the same SDLFB *fb = static_cast (old); if (fb->Width == width && - fb->Height == height) + fb->Height == height && + fb->Bgra == bgra) { bool fsnow = (SDL_GetWindowFlags (fb->Screen) & SDL_WINDOW_FULLSCREEN_DESKTOP) != 0; @@ -296,7 +297,7 @@ DFrameBuffer *SDLVideo::CreateFrameBuffer (int width, int height, bool bgra, boo flashAmount = 0; } - SDLFB *fb = new SDLFB (width, height, fullscreen, oldwin); + SDLFB *fb = new SDLFB (width, height, bgra, fullscreen, oldwin); // If we could not create the framebuffer, try again with slightly // different parameters in this order: @@ -350,8 +351,8 @@ void SDLVideo::SetWindowedScale (float scale) // FrameBuffer implementation ----------------------------------------------- -SDLFB::SDLFB (int width, int height, bool fullscreen, SDL_Window *oldwin) - : DFrameBuffer (width, height, false) +SDLFB::SDLFB (int width, int height, bool bgra, bool fullscreen, SDL_Window *oldwin) + : DFrameBuffer (width, height, bgra) { int i; @@ -494,7 +495,21 @@ void SDLFB::Update () pitch = Surface->pitch; } - if (NotPaletted) + if (Bgra) + { + if (pitch == Pitch * 4) + { + memcpy(pixels, MemBuffer, Width*Height*4); + } + else + { + for (int y = 0; y < Height; ++y) + { + memcpy((BYTE *)pixels + y*pitch, MemBuffer + y*Pitch*4, Width*4); + } + } + } + else if (NotPaletted) { GPfx.Convert (MemBuffer, Pitch, pixels, pitch, Width, Height, @@ -674,13 +689,20 @@ void SDLFB::ResetSDLRenderer () SDL_SetRenderDrawColor(Renderer, 0, 0, 0, 255); Uint32 fmt; - switch(vid_displaybits) + if (Bgra) { - default: fmt = SDL_PIXELFORMAT_ARGB8888; break; - case 30: fmt = SDL_PIXELFORMAT_ARGB2101010; break; - case 24: fmt = SDL_PIXELFORMAT_RGB888; break; - case 16: fmt = SDL_PIXELFORMAT_RGB565; break; - case 15: fmt = SDL_PIXELFORMAT_ARGB1555; break; + fmt = SDL_PIXELFORMAT_ARGB8888; + } + else + { + switch (vid_displaybits) + { + default: fmt = SDL_PIXELFORMAT_ARGB8888; break; + case 30: fmt = SDL_PIXELFORMAT_ARGB2101010; break; + case 24: fmt = SDL_PIXELFORMAT_RGB888; break; + case 16: fmt = SDL_PIXELFORMAT_RGB565; break; + case 15: fmt = SDL_PIXELFORMAT_ARGB1555; break; + } } Texture = SDL_CreateTexture (Renderer, fmt, SDL_TEXTUREACCESS_STREAMING, Width, Height); From 7142faf41d8cd250a19cd86af9becd0ae2d79e32 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 2 Jun 2016 21:39:44 +0200 Subject: [PATCH 14/94] Minor compile error fixes when X86_ASM is defined --- src/r_draw.cpp | 6 +++--- src/r_draw.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index c190c1e73..2c2c67ad6 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -3025,9 +3025,11 @@ extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *v // wallscan stuff, in C +static int vlinebits; +static int mvlinebits; + #ifndef X86_ASM static DWORD vlinec1 (); -static int vlinebits; DWORD (*dovline1)() = vlinec1; DWORD (*doprevline1)() = vlinec1; @@ -3043,7 +3045,6 @@ void (*dovline4)() = vlinec4; static DWORD mvlinec1(); static void mvlinec4(); -static int mvlinebits; DWORD (*domvline1)() = mvlinec1; void (*domvline4)() = mvlinec4; @@ -4532,7 +4533,6 @@ void R_InitColumnDrawers () rt_tlateaddclamp1col = rt_tlateaddclamp1col_c; rt_tlatesubclamp1col = rt_tlatesubclamp1col_c; rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_c; - rt_map4cols = rt_map4cols_c; rt_subclamp4cols = rt_subclamp4cols_c; rt_revsubclamp4cols = rt_revsubclamp4cols_c; rt_tlate4cols = rt_tlate4cols_c; diff --git a/src/r_draw.h b/src/r_draw.h index f60b2299e..fcaedff47 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -265,6 +265,8 @@ void R_DrawShadedColumnP_C (void); void R_DrawSpanP_C (void); void R_DrawSpanMaskedP_C (void); +#endif + void R_DrawColumnHorizP_RGBA_C (void); void R_DrawColumnP_RGBA_C (void); void R_DrawFuzzColumnP_RGBA_C (void); @@ -274,8 +276,6 @@ void R_DrawSpanP_RGBA_C (void); void R_DrawSpanP_RGBA_SSE (void); void R_DrawSpanMaskedP_RGBA_C (void); -#endif - void R_DrawSpanTranslucentP_C (void); void R_DrawSpanMaskedTranslucentP_C (void); From 6160675e080355f35206af2164306a29be3af4be Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 3 Jun 2016 12:42:08 +0200 Subject: [PATCH 15/94] Added a few more SSE drawers --- src/r_draw.cpp | 17 ++- src/r_draw.h | 4 + src/r_drawt_rgba.cpp | 273 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 289 insertions(+), 5 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 2c2c67ad6..aed4bbeea 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -4364,7 +4364,6 @@ void R_InitColumnDrawers () R_DrawTranslatedColumn = R_DrawTranslatedColumnP_RGBA_C; R_DrawShadedColumn = R_DrawShadedColumnP_RGBA_C; R_DrawSpanMasked = R_DrawSpanMaskedP_RGBA_C; - rt_map4cols = rt_map4cols_RGBA_c; #ifndef NO_SSE R_DrawSpan = R_DrawSpanP_RGBA_SSE; #else @@ -4409,9 +4408,6 @@ void R_InitColumnDrawers () rt_copy1col = rt_copy1col_RGBA_c; rt_copy4cols = rt_copy4cols_RGBA_c; rt_map1col = rt_map1col_RGBA_c; - rt_shaded4cols = rt_shaded4cols_RGBA_c; - rt_add4cols = rt_add4cols_RGBA_c; - rt_addclamp4cols = rt_addclamp4cols_RGBA_c; rt_shaded1col = rt_shaded1col_RGBA_c; rt_add1col = rt_add1col_RGBA_c; rt_addclamp1col = rt_addclamp1col_RGBA_c; @@ -4422,7 +4418,6 @@ void R_InitColumnDrawers () rt_tlateaddclamp1col = rt_tlateaddclamp1col_RGBA_c; rt_tlatesubclamp1col = rt_tlatesubclamp1col_RGBA_c; rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_RGBA_c; - rt_map4cols = rt_map4cols_RGBA_c; rt_subclamp4cols = rt_subclamp4cols_RGBA_c; rt_revsubclamp4cols = rt_revsubclamp4cols_RGBA_c; rt_tlate4cols = rt_tlate4cols_RGBA_c; @@ -4432,6 +4427,18 @@ void R_InitColumnDrawers () rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_RGBA_c; rt_initcols = rt_initcols_rgba; +#ifndef NO_SSE + rt_map4cols = rt_map4cols_RGBA_SSE; + rt_add4cols = rt_add4cols_RGBA_SSE; + rt_addclamp4cols = rt_addclamp4cols_RGBA_SSE; + rt_shaded4cols = rt_shaded4cols_RGBA_SSE; +#else + rt_map4cols = rt_map4cols_RGBA_c; + rt_add4cols = rt_add4cols_RGBA_c; + rt_addclamp4cols = rt_addclamp4cols_RGBA_c; + rt_shaded4cols = rt_shaded4cols_RGBA_c; +#endif + dovline1 = vlinec1_RGBA; doprevline1 = vlinec1_RGBA; domvline1 = mvlinec1_RGBA; diff --git a/src/r_draw.h b/src/r_draw.h index fcaedff47..27a985dcb 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -173,6 +173,7 @@ void rt_copy4cols_RGBA_c (int sx, int yl, int yh); void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh); void rt_shaded4cols_RGBA_c (int sx, int yl, int yh); +void rt_shaded4cols_RGBA_SSE (int sx, int yl, int yh); void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh); void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh); @@ -187,8 +188,11 @@ void rt_tlatesubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); void rt_tlaterevsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); void rt_map4cols_RGBA_c (int sx, int yl, int yh); +void rt_map4cols_RGBA_SSE (int sx, int yl, int yh); void rt_add4cols_RGBA_c (int sx, int yl, int yh); +void rt_add4cols_RGBA_SSE (int sx, int yl, int yh); void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh); +void rt_addclamp4cols_RGBA_SSE (int sx, int yl, int yh); void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh); void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh); diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 60520783d..d390fc54d 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -42,6 +42,9 @@ #include "r_main.h" #include "r_things.h" #include "v_video.h" +#ifndef NO_SSE +#include +#endif uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT*4]; uint32_t *dc_temp_rgba; @@ -185,6 +188,98 @@ void rt_map4cols_RGBA_c (int sx, int yl, int yh) } while (--count); } +// Maps all four spans to the screen starting at sx. +void rt_map4cols_RGBA_SSE(int sx, int yl, int yh) +{ + BYTE *colormap; + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + uint32_t light = calc_light_multiplier(dc_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + + colormap = dc_colormap; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + + if (count & 1) { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + fg_hi = _mm_mullo_epi16(fg_hi, mlight); + fg_hi = _mm_srli_epi16(fg_hi, 8); + fg_lo = _mm_mullo_epi16(fg_lo, mlight); + fg_lo = _mm_srli_epi16(fg_lo, 8); + + fg = _mm_packus_epi16(fg_lo, fg_hi); + _mm_storeu_si128((__m128i*)dest, fg); + + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + // shade_pal_index 0-3 + { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + fg_hi = _mm_mullo_epi16(fg_hi, mlight); + fg_hi = _mm_srli_epi16(fg_hi, 8); + fg_lo = _mm_mullo_epi16(fg_lo, mlight); + fg_lo = _mm_srli_epi16(fg_lo, 8); + + fg = _mm_packus_epi16(fg_lo, fg_hi); + _mm_storeu_si128((__m128i*)dest, fg); + } + + // shade_pal_index 4-7 (pitch) + { + uint32_t p0 = colormap[source[4]]; + uint32_t p1 = colormap[source[5]]; + uint32_t p2 = colormap[source[6]]; + uint32_t p3 = colormap[source[7]]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + fg_hi = _mm_mullo_epi16(fg_hi, mlight); + fg_hi = _mm_srli_epi16(fg_hi, 8); + fg_lo = _mm_mullo_epi16(fg_lo, mlight); + fg_lo = _mm_srli_epi16(fg_lo, 8); + + fg = _mm_packus_epi16(fg_lo, fg_hi); + _mm_storeu_si128((__m128i*)(dest + pitch), fg); + } + + source += 8; + dest += pitch * 2; + } while (--count); +} + void rt_Translate1col_RGBA_c(const BYTE *translation, int hx, int yl, int yh) { int count = yh - yl + 1; @@ -380,6 +475,69 @@ void rt_add4cols_RGBA_c (int sx, int yl, int yh) } while (--count); } +// Adds all four spans to the screen starting at sx without clamping. +#ifndef NO_SSE +void rt_add4cols_RGBA_SSE(int sx, int yl, int yh) +{ + BYTE *colormap; + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + colormap = dc_colormap; + + uint32_t light = calc_light_multiplier(dc_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + fg_hi = _mm_mullo_epi16(fg_hi, mlight); + fg_hi = _mm_srli_epi16(fg_hi, 8); + fg_lo = _mm_mullo_epi16(fg_lo, mlight); + fg_lo = _mm_srli_epi16(fg_lo, 8); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); +} +#endif + // Translates and adds one span at hx to the screen at sx without clamping. void rt_tlateadd1col_RGBA_c (int hx, int sx, int yl, int yh) { @@ -481,6 +639,58 @@ void rt_shaded4cols_RGBA_c (int sx, int yl, int yh) } while (--count); } +// Shades all four spans to the screen starting at sx. +#ifndef NO_SSE +void rt_shaded4cols_RGBA_SSE(int sx, int yl, int yh) +{ + BYTE *colormap; + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + colormap = dc_colormap; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index(dc_color, calc_light_multiplier(dc_light))), _mm_setzero_si128()); + __m128i alpha_one = _mm_set1_epi16(64); + + do { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + __m128i alpha_hi = _mm_set_epi16(64, p3, p3, p3, 64, p2, p2, p2); + __m128i alpha_lo = _mm_set_epi16(64, p1, p1, p1, 64, p0, p0, p0); + __m128i inv_alpha_hi = _mm_subs_epu16(alpha_one, alpha_hi); + __m128i inv_alpha_lo = _mm_subs_epu16(alpha_one, alpha_lo); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * alpha + bg_red * inv_alpha) / 64: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_hi), _mm_mullo_epi16(bg_hi, inv_alpha_hi)), 6); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_lo), _mm_mullo_epi16(bg_lo, inv_alpha_lo)), 6); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); +} +#endif + // Adds one span at hx to the screen at sx with clamping. void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { @@ -572,6 +782,69 @@ void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) } while (--count); } +// Adds all four spans to the screen starting at sx with clamping. +#ifndef NO_SSE +void rt_addclamp4cols_RGBA_SSE(int sx, int yl, int yh) +{ + BYTE *colormap; + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + colormap = dc_colormap; + + uint32_t light = calc_light_multiplier(dc_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + fg_hi = _mm_mullo_epi16(fg_hi, mlight); + fg_hi = _mm_srli_epi16(fg_hi, 8); + fg_lo = _mm_mullo_epi16(fg_lo, mlight); + fg_lo = _mm_srli_epi16(fg_lo, 8); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); +} +#endif + // Translates and adds one span at hx to the screen at sx with clamping. void rt_tlateaddclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { From 373b59b94fa93b78527c50a0af9aea84e09a569b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 3 Jun 2016 14:06:44 +0200 Subject: [PATCH 16/94] Fix dovline4 being a define on X64_ASM --- src/r_draw.cpp | 2 +- src/r_draw.h | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index aed4bbeea..ccaa864e6 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -3036,8 +3036,8 @@ DWORD (*doprevline1)() = vlinec1; #ifdef X64_ASM extern "C" void vlinetallasm4(); -#define dovline4 vlinetallasm4 extern "C" void setupvlinetallasm (int); +void (*dovline4)() = vlinetallasm4; #else static void vlinec4 (); void (*dovline4)() = vlinec4; diff --git a/src/r_draw.h b/src/r_draw.h index 27a985dcb..2eefff9bd 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -71,12 +71,7 @@ extern void (*R_DrawColumn)(void); extern DWORD (*dovline1) (); extern DWORD (*doprevline1) (); -#ifdef X64_ASM -#define dovline4 vlinetallasm4 -extern "C" void vlinetallasm4(); -#else extern void (*dovline4) (); -#endif extern void setupvline (int); extern DWORD (*domvline1) (); From af02bafdeb4a96e091f6ff8608d3d82278bf7c3e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 3 Jun 2016 22:57:36 +0200 Subject: [PATCH 17/94] Fixed missing some columns in transparency rendering --- src/r_draw.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index ccaa864e6..ec0645fd2 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -952,12 +952,14 @@ void R_DrawAddColumnP_RGBA_C() int pitch = dc_pitch; BYTE *colormap = dc_colormap; + uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], 0); + uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; From 0c8c9e0aeace39987a44183ff16670e5ea967007 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 5 Jun 2016 14:08:03 +0200 Subject: [PATCH 18/94] Added FDynamicColormap support to true color mode --- src/g_level.cpp | 2 +- src/g_shared/a_artifacts.cpp | 2 +- src/r_data/colormaps.cpp | 20 +- src/r_data/colormaps.h | 25 +- src/r_defs.h | 3 +- src/r_draw.cpp | 592 ++++++++++++++++++++++++----------- src/r_draw.h | 19 +- src/r_drawt_rgba.cpp | 427 +++++++++++++++---------- src/r_main.cpp | 8 +- src/r_main.h | 141 ++++++++- src/r_plane.cpp | 12 +- src/r_segs.cpp | 45 ++- src/r_swrenderer.cpp | 2 +- src/r_things.cpp | 79 +++-- src/r_utility.cpp | 4 +- src/v_draw.cpp | 13 +- 16 files changed, 937 insertions(+), 457 deletions(-) diff --git a/src/g_level.cpp b/src/g_level.cpp index 141932c22..d27747ccb 100644 --- a/src/g_level.cpp +++ b/src/g_level.cpp @@ -1307,7 +1307,7 @@ void G_InitLevelLocals () level_info_t *info; BaseBlendA = 0.0f; // Remove underwater blend effect, if any - NormalLight.Maps = realcolormaps; + NormalLight.Maps = realcolormaps.Maps; // [BB] Instead of just setting the color, we also have to reset Desaturate and build the lights. NormalLight.ChangeColor (PalEntry (255, 255, 255), 0); diff --git a/src/g_shared/a_artifacts.cpp b/src/g_shared/a_artifacts.cpp index 777d6824a..305260ebf 100644 --- a/src/g_shared/a_artifacts.cpp +++ b/src/g_shared/a_artifacts.cpp @@ -737,7 +737,7 @@ int APowerInvisibility::AlterWeaponSprite (visstyle_t *vis) if ((vis->Alpha < 0.25f && special1 > 0) || (vis->Alpha == 0)) { vis->Alpha = clamp((1.f - float(Strength/100)), 0.f, 1.f); - vis->BaseColormap = SpecialColormaps[INVERSECOLORMAP].Colormap; + vis->BaseColormap = &SpecialColormaps[INVERSECOLORMAP]; vis->ColormapNum = 0; } return -1; // This item is valid so another one shouldn't reset the translucency diff --git a/src/r_data/colormaps.cpp b/src/r_data/colormaps.cpp index b46342463..ffaaa38ac 100644 --- a/src/r_data/colormaps.cpp +++ b/src/r_data/colormaps.cpp @@ -71,7 +71,7 @@ struct FakeCmap }; TArray fakecmaps; -BYTE *realcolormaps; +FColormap realcolormaps; size_t numfakecmaps; @@ -408,7 +408,7 @@ void R_SetDefaultColormap (const char *name) foo.Color = 0xFFFFFF; foo.Fade = 0; - foo.Maps = realcolormaps; + foo.Maps = realcolormaps.Maps; foo.Desaturate = 0; foo.Next = NULL; foo.BuildLights (); @@ -430,7 +430,7 @@ void R_SetDefaultColormap (const char *name) remap[0] = 0; for (i = 0; i < NUMCOLORMAPS; ++i) { - BYTE *map2 = &realcolormaps[i*256]; + BYTE *map2 = &realcolormaps.Maps[i*256]; lumpr.Read (map, 256); for (j = 0; j < 256; ++j) { @@ -454,11 +454,7 @@ void R_DeinitColormaps () { SpecialColormaps.Clear(); fakecmaps.Clear(); - if (realcolormaps != NULL) - { - delete[] realcolormaps; - realcolormaps = NULL; - } + delete[] realcolormaps.Maps; FreeSpecialLights(); } @@ -501,7 +497,7 @@ void R_InitColormaps () } } } - realcolormaps = new BYTE[256*NUMCOLORMAPS*fakecmaps.Size()]; + realcolormaps.Maps = new BYTE[256*NUMCOLORMAPS*fakecmaps.Size()]; R_SetDefaultColormap ("COLORMAP"); if (fakecmaps.Size() > 1) @@ -523,7 +519,7 @@ void R_InitColormaps () { int k, r, g, b; FWadLump lump = Wads.OpenLumpNum (fakecmaps[j].lump); - BYTE *const map = realcolormaps + NUMCOLORMAPS*256*j; + BYTE *const map = realcolormaps.Maps + NUMCOLORMAPS*256*j; for (k = 0; k < NUMCOLORMAPS; ++k) { @@ -550,8 +546,8 @@ void R_InitColormaps () } NormalLight.Color = PalEntry (255, 255, 255); NormalLight.Fade = 0; - NormalLight.Maps = realcolormaps; - NormalLightHasFixedLights = R_CheckForFixedLights(realcolormaps); + NormalLight.Maps = realcolormaps.Maps; + NormalLightHasFixedLights = R_CheckForFixedLights(realcolormaps.Maps); numfakecmaps = fakecmaps.Size(); // build default special maps (e.g. invulnerability) diff --git a/src/r_data/colormaps.h b/src/r_data/colormaps.h index 0764191a3..bda6a5ea4 100644 --- a/src/r_data/colormaps.h +++ b/src/r_data/colormaps.h @@ -1,18 +1,26 @@ #ifndef __RES_CMAP_H #define __RES_CMAP_H +struct FColormap; + void R_InitColormaps (); void R_DeinitColormaps (); DWORD R_ColormapNumForName(const char *name); // killough 4/4/98 void R_SetDefaultColormap (const char *name); // [RH] change normal fadetable DWORD R_BlendForColormap (DWORD map); // [RH] return calculated blend for a colormap -extern BYTE *realcolormaps; // [RH] make the colormaps externally visible +extern FColormap realcolormaps; // [RH] make the colormaps externally visible extern size_t numfakecmaps; +struct FColormap +{ + BYTE *Maps = nullptr; + PalEntry Color = 0xffffffff; + PalEntry Fade = 0xff000000; + int Desaturate = 0; +}; - -struct FDynamicColormap +struct FDynamicColormap : FColormap { void ChangeFade (PalEntry fadecolor); void ChangeColor (PalEntry lightcolor, int desaturate); @@ -20,10 +28,6 @@ struct FDynamicColormap void BuildLights (); static void RebuildAllLights(); - BYTE *Maps; - PalEntry Color; - PalEntry Fade; - int Desaturate; FDynamicColormap *Next; }; @@ -43,8 +47,13 @@ enum }; -struct FSpecialColormap +struct FSpecialColormap : FColormap { + FSpecialColormap() + { + Maps = Colormap; + } + float ColorizeStart[3]; float ColorizeEnd[3]; BYTE Colormap[256]; diff --git a/src/r_defs.h b/src/r_defs.h index 8a247a5c0..c0f878664 100644 --- a/src/r_defs.h +++ b/src/r_defs.h @@ -1397,12 +1397,13 @@ struct FMiniBSP // typedef BYTE lighttable_t; // This could be wider for >8 bit display. +struct FColormap; // This encapsulates the fields of vissprite_t that can be altered by AlterWeaponSprite struct visstyle_t { int ColormapNum; // Which colormap is rendered - lighttable_t *BaseColormap; // Base colormap used together with ColormapNum + FColormap *BaseColormap; // Base colormap used together with ColormapNum float Alpha; FRenderStyle RenderStyle; }; diff --git a/src/r_draw.cpp b/src/r_draw.cpp index ec0645fd2..2e21c7038 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -145,6 +145,8 @@ extern "C" { int dc_pitch=0xABadCafe; // [RH] Distance between rows lighttable_t* dc_colormap; +FColormap *dc_fcolormap; +ShadeConstants dc_shade_constants; fixed_t dc_light; int dc_x; int dc_yl; @@ -179,6 +181,7 @@ BYTE *dc_translation; BYTE shadetables[NUMCOLORMAPS*16*256]; FDynamicColormap ShadeFakeColormap[16]; BYTE identitymap[256]; +FDynamicColormap identitycolormap; EXTERN_CVAR (Int, r_columnmethod) @@ -219,6 +222,10 @@ void R_InitShadeMaps() { identitymap[i] = i; } + identitycolormap.Color = ~0u; + identitycolormap.Desaturate = 0; + identitycolormap.Next = NULL; + identitycolormap.Maps = identitymap; } /************************************/ @@ -297,6 +304,7 @@ void R_DrawColumnP_RGBA_C() dest = (uint32_t*)dc_dest; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; // Determine scaling, // which is the only mapping to be done. @@ -315,9 +323,7 @@ void R_DrawColumnP_RGBA_C() // This is as fast as it gets. do { - // Re-map color indices from wall texture column - // using a lighting/special effects LUT. - *dest = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + *dest = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); dest += pitch; frac += fracstep; @@ -371,7 +377,7 @@ void R_FillColumnP_RGBA() do { - *dest = shade_pal_index(color, light); + *dest = shade_pal_index_simple(color, light); dest += pitch; } while (--count); } @@ -416,7 +422,7 @@ void R_FillAddColumn_RGBA_C() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -481,7 +487,7 @@ void R_FillAddClampColumn_RGBA() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -545,7 +551,7 @@ void R_FillSubClampColumn_RGBA() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -609,7 +615,7 @@ void R_FillRevSubClampColumn_RGBA() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -953,13 +959,14 @@ void R_DrawAddColumnP_RGBA_C() BYTE *colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -1032,6 +1039,7 @@ void R_DrawTranslatedColumnP_RGBA_C() return; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; dest = (uint32_t*)dc_dest; @@ -1040,14 +1048,13 @@ void R_DrawTranslatedColumnP_RGBA_C() { // [RH] Local copies of global vars to improve compiler optimizations - BYTE *colormap = dc_colormap; BYTE *translation = dc_translation; const BYTE *source = dc_source; int pitch = dc_pitch; do { - *dest = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + *dest = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); dest += pitch; frac += fracstep; } while (--count); @@ -1106,6 +1113,7 @@ void R_DrawTlatedAddColumnP_RGBA_C() return; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; dest = (uint32_t*)dc_dest; @@ -1114,7 +1122,6 @@ void R_DrawTlatedAddColumnP_RGBA_C() { BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; const BYTE *source = dc_source; int pitch = dc_pitch; @@ -1123,7 +1130,7 @@ void R_DrawTlatedAddColumnP_RGBA_C() do { - uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -1197,7 +1204,7 @@ void R_DrawShadedColumnP_RGBA_C() fracstep = dc_iscale; frac = dc_texturefrac; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1286,16 +1293,16 @@ void R_DrawAddClampColumnP_RGBA_C() { const BYTE *source = dc_source; - BYTE *colormap = dc_colormap; int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1375,17 +1382,17 @@ void R_DrawAddClampTranslatedColumnP_RGBA_C() { BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; const BYTE *source = dc_source; int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1462,17 +1469,17 @@ void R_DrawSubClampColumnP_RGBA_C() frac = dc_texturefrac; { - BYTE *colormap = dc_colormap; const BYTE *source = dc_source; int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1551,17 +1558,17 @@ void R_DrawSubClampTranslatedColumnP_RGBA_C() { BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; const BYTE *source = dc_source; int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1638,16 +1645,16 @@ void R_DrawRevSubClampColumnP_RGBA_C() frac = dc_texturefrac; { - BYTE *colormap = dc_colormap; const BYTE *source = dc_source; int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1726,17 +1733,17 @@ void R_DrawRevSubClampTranslatedColumnP_RGBA_C() { BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; const BYTE *source = dc_source; int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1784,8 +1791,10 @@ int ds_y; int ds_x1; int ds_x2; +FColormap* ds_fcolormap; lighttable_t* ds_colormap; -//dsfixed_t ds_light; +ShadeConstants ds_shade_constants; +dsfixed_t ds_light; dsfixed_t ds_xfrac; dsfixed_t ds_yfrac; @@ -1835,9 +1844,9 @@ void R_SetSpanSource(const BYTE *pixels) // //========================================================================== -void R_SetSpanColormap(BYTE *colormap) +void R_SetSpanColormap(FDynamicColormap *colormap, int shade) { - R_SetDSColorMapLight(colormap, 0, 0); + R_SetDSColorMapLight(colormap, 0, shade); #ifdef X86_ASM if (!r_swtruecolor && ds_colormap != ds_curcolormap) { @@ -1956,7 +1965,6 @@ void R_DrawSpanP_RGBA_C() dsfixed_t ystep; uint32_t* dest; const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; int count; int spot; @@ -1980,6 +1988,7 @@ void R_DrawSpanP_RGBA_C() ystep = ds_ystep; uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; if (ds_xbits == 6 && ds_ybits == 6) { @@ -1990,9 +1999,8 @@ void R_DrawSpanP_RGBA_C() // Current texture index in u,v. spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - *dest++ = shade_pal_index(colormap[source[spot]], light); + // Lookup pixel from flat texture tile + *dest++ = shade_pal_index(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -2010,9 +2018,8 @@ void R_DrawSpanP_RGBA_C() // Current texture index in u,v. spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - *dest++ = shade_pal_index(colormap[source[spot]], light); + // Lookup pixel from flat texture tile + *dest++ = shade_pal_index(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -2030,7 +2037,6 @@ void R_DrawSpanP_RGBA_SSE() dsfixed_t ystep; uint32_t* dest; const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; int count; int spot; @@ -2054,54 +2060,92 @@ void R_DrawSpanP_RGBA_SSE() ystep = ds_ystep; uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; if (ds_xbits == 6 && ds_ybits == 6) { // 64x64 is the most common case by far, so special case it. - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; int sse_count = count / 4; count -= sse_count * 4; - while (sse_count--) + + if (shade_constants.simple_shade) { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p0 = colormap[source[spot]]; - xfrac += xstep; - yfrac += ystep; + SSE_SHADE_SIMPLE_INIT(light); - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p1 = colormap[source[spot]]; - xfrac += xstep; - yfrac += ystep; + while (sse_count--) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p2 = colormap[source[spot]]; - xfrac += xstep; - yfrac += ystep; + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p3 = colormap[source[spot]]; - xfrac += xstep; - yfrac += ystep; + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - fg_hi = _mm_mullo_epi16(fg_hi, mlight); - fg_hi = _mm_srli_epi16(fg_hi, 8); - fg_lo = _mm_mullo_epi16(fg_lo, mlight); - fg_lo = _mm_srli_epi16(fg_lo, 8); - fg = _mm_packus_epi16(fg_lo, fg_hi); - _mm_storeu_si128((__m128i*)dest, fg); + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; - // Next step in u,v. - dest += 4; + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + + // Next step in u,v. + dest += 4; + } } + else + { + SSE_SHADE_INIT(light, shade_constants); + + while (sse_count--) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + + // Next step in u,v. + dest += 4; + } + } + if (count == 0) return; @@ -2110,9 +2154,8 @@ void R_DrawSpanP_RGBA_SSE() // Current texture index in u,v. spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - *dest++ = shade_pal_index(colormap[source[spot]], light); + // Lookup pixel from flat texture tile + *dest++ = shade_pal_index(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -2130,9 +2173,8 @@ void R_DrawSpanP_RGBA_SSE() // Current texture index in u,v. spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - *dest++ = shade_pal_index(colormap[source[spot]], light); + // Lookup pixel from flat texture tile + *dest++ = shade_pal_index(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -2221,6 +2263,7 @@ void R_DrawSpanMaskedP_RGBA_C() int spot; uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -2243,7 +2286,7 @@ void R_DrawSpanMaskedP_RGBA_C() texdata = source[spot]; if (texdata != 0) { - *dest = shade_pal_index(colormap[texdata], light); + *dest = shade_pal_index(texdata, light, shade_constants); } dest++; xfrac += xstep; @@ -2263,7 +2306,7 @@ void R_DrawSpanMaskedP_RGBA_C() texdata = source[spot]; if (texdata != 0) { - *dest = shade_pal_index(colormap[texdata], light); + *dest = shade_pal_index(texdata, light, shade_constants); } dest++; xfrac += xstep; @@ -2343,7 +2386,6 @@ void R_DrawSpanTranslucentP_RGBA_C() dsfixed_t ystep; uint32_t* dest; const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; int count; int spot; @@ -2358,6 +2400,7 @@ void R_DrawSpanTranslucentP_RGBA_C() ystep = ds_ystep; uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -2369,7 +2412,7 @@ void R_DrawSpanTranslucentP_RGBA_C() { spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t fg = shade_pal_index(colormap[source[spot]], light); + uint32_t fg = shade_pal_index(source[spot], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2397,7 +2440,7 @@ void R_DrawSpanTranslucentP_RGBA_C() { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t fg = shade_pal_index(colormap[source[spot]], light); + uint32_t fg = shade_pal_index(source[spot], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2502,11 +2545,11 @@ void R_DrawSpanMaskedTranslucentP_RGBA_C() dsfixed_t ystep; uint32_t* dest; const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; int count; int spot; uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -2532,7 +2575,7 @@ void R_DrawSpanMaskedTranslucentP_RGBA_C() texdata = source[spot]; if (texdata != 0) { - uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg = shade_pal_index(texdata, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2565,7 +2608,7 @@ void R_DrawSpanMaskedTranslucentP_RGBA_C() texdata = source[spot]; if (texdata != 0) { - uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg = shade_pal_index(texdata, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2665,11 +2708,11 @@ void R_DrawSpanAddClampP_RGBA_C() dsfixed_t ystep; uint32_t* dest; const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; int count; int spot; uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -2691,7 +2734,7 @@ void R_DrawSpanAddClampP_RGBA_C() { spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t fg = shade_pal_index(colormap[source[spot]], light); + uint32_t fg = shade_pal_index(source[spot], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2719,7 +2762,7 @@ void R_DrawSpanAddClampP_RGBA_C() { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t fg = shade_pal_index(colormap[source[spot]], light); + uint32_t fg = shade_pal_index(source[spot], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2830,11 +2873,11 @@ void R_DrawSpanMaskedAddClampP_RGBA_C() dsfixed_t ystep; uint32_t* dest; const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; int count; int spot; uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -2860,7 +2903,7 @@ void R_DrawSpanMaskedAddClampP_RGBA_C() texdata = source[spot]; if (texdata != 0) { - uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg = shade_pal_index(texdata, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2893,7 +2936,7 @@ void R_DrawSpanMaskedAddClampP_RGBA_C() texdata = source[spot]; if (texdata != 0) { - uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg = shade_pal_index(texdata, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2926,7 +2969,7 @@ void R_FillSpan_RGBA() uint32_t *dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; int count = (ds_x2 - ds_x1 + 1); uint32_t light = calc_light_multiplier(ds_light); - uint32_t color = shade_pal_index(ds_color, light); + uint32_t color = shade_pal_index_simple(ds_color, light); for (int i = 0; i < count; i++) dest[i] = color; } @@ -3147,7 +3190,6 @@ DWORD vlinec1_RGBA() { DWORD fracstep = dc_iscale; DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; uint32_t *dest = (uint32_t*)dc_dest; @@ -3155,10 +3197,11 @@ DWORD vlinec1_RGBA() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; do { - *dest = shade_pal_index(colormap[source[frac >> bits]], light); + *dest = shade_pal_index(source[frac >> bits], light, shade_constants); frac += fracstep; dest += pitch; } while (--count); @@ -3197,12 +3240,14 @@ void vlinec4_RGBA() uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); + ShadeConstants shade_constants = dc_shade_constants; + do { - dest[0] = shade_pal_index(palookupoffse[0][bufplce[0][(place = vplce[0]) >> bits]], light0); vplce[0] = place + vince[0]; - dest[1] = shade_pal_index(palookupoffse[1][bufplce[1][(place = vplce[1]) >> bits]], light1); vplce[1] = place + vince[1]; - dest[2] = shade_pal_index(palookupoffse[2][bufplce[2][(place = vplce[2]) >> bits]], light2); vplce[2] = place + vince[2]; - dest[3] = shade_pal_index(palookupoffse[3][bufplce[3][(place = vplce[3]) >> bits]], light3); vplce[3] = place + vince[3]; + dest[0] = shade_pal_index(bufplce[0][(place = vplce[0]) >> bits], light0, shade_constants); vplce[0] = place + vince[0]; + dest[1] = shade_pal_index(bufplce[1][(place = vplce[1]) >> bits], light1, shade_constants); vplce[1] = place + vince[1]; + dest[2] = shade_pal_index(bufplce[2][(place = vplce[2]) >> bits], light2, shade_constants); vplce[2] = place + vince[2]; + dest[3] = shade_pal_index(bufplce[3][(place = vplce[3]) >> bits], light3, shade_constants); vplce[3] = place + vince[3]; dest += dc_pitch; } while (--count); } @@ -3219,40 +3264,64 @@ void vlinec4_RGBA_SSE() uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); - __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); - __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); + ShadeConstants shade_constants = dc_shade_constants; + uint32_t *palette = (uint32_t*)GPalette.BaseColors; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - do + if (shade_constants.simple_shade) { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; + SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; - BYTE p0 = palookupoffse[0][bufplce[0][place0 >> bits]]; - BYTE p1 = palookupoffse[1][bufplce[1][place1 >> bits]]; - BYTE p2 = palookupoffse[2][bufplce[2][place2 >> bits]]; - BYTE p3 = palookupoffse[3][bufplce[3][place3 >> bits]]; + BYTE p0 = bufplce[0][place0 >> bits]; + BYTE p1 = bufplce[1][place1 >> bits]; + BYTE p2 = bufplce[2][place2 >> bits]; + BYTE p3 = bufplce[3][place3 >> bits]; - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - fg_hi = _mm_mullo_epi16(fg_hi, mlight_hi); - fg_hi = _mm_srli_epi16(fg_hi, 8); - fg_lo = _mm_mullo_epi16(fg_lo, mlight_lo); - fg_lo = _mm_srli_epi16(fg_lo, 8); - fg = _mm_packus_epi16(fg_lo, fg_hi); - _mm_storeu_si128((__m128i*)dest, fg); - dest += dc_pitch; - } while (--count); + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += dc_pitch; + } while (--count); + } + else + { + SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + BYTE p0 = bufplce[0][place0 >> bits]; + BYTE p1 = bufplce[1][place1 >> bits]; + BYTE p2 = bufplce[2][place2 >> bits]; + BYTE p3 = bufplce[3][place3 >> bits]; + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + dest += dc_pitch; + } while (--count); + } // Is this needed? Global variables makes it tricky to know.. vplce[0] = local_vplce[0]; @@ -3323,13 +3392,14 @@ DWORD mvlinec1_RGBA() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; do { BYTE pix = source[frac >> bits]; if (pix != 0) { - *dest = shade_pal_index(colormap[pix], light); + *dest = shade_pal_index(pix, light, shade_constants); } frac += fracstep; dest += pitch; @@ -3370,13 +3440,15 @@ void mvlinec4_RGBA() uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); + ShadeConstants shade_constants = dc_shade_constants; + do { BYTE pix; - pix = bufplce[0][(place = vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(palookupoffse[0][pix], light0); vplce[0] = place + vince[0]; - pix = bufplce[1][(place = vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(palookupoffse[1][pix], light1); vplce[1] = place + vince[1]; - pix = bufplce[2][(place = vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(palookupoffse[2][pix], light2); vplce[2] = place + vince[2]; - pix = bufplce[3][(place = vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(palookupoffse[3][pix], light3); vplce[3] = place + vince[3]; + pix = bufplce[0][(place = vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(pix, light0, shade_constants); vplce[0] = place + vince[0]; + pix = bufplce[1][(place = vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(pix, light1, shade_constants); vplce[1] = place + vince[1]; + pix = bufplce[2][(place = vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(pix, light2, shade_constants); vplce[2] = place + vince[2]; + pix = bufplce[3][(place = vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(pix, light3, shade_constants); vplce[3] = place + vince[3]; dest += dc_pitch; } while (--count); } @@ -3393,48 +3465,70 @@ void mvlinec4_RGBA_SSE() uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); - __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); - __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); + ShadeConstants shade_constants = dc_shade_constants; + uint32_t *palette = (uint32_t*)GPalette.BaseColors; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - do + if (shade_constants.simple_shade) { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; + SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; - BYTE pix0 = bufplce[0][place0 >> bits]; - BYTE pix1 = bufplce[1][place1 >> bits]; - BYTE pix2 = bufplce[2][place2 >> bits]; - BYTE pix3 = bufplce[3][place3 >> bits]; + BYTE pix0 = bufplce[0][place0 >> bits]; + BYTE pix1 = bufplce[1][place1 >> bits]; + BYTE pix2 = bufplce[2][place2 >> bits]; + BYTE pix3 = bufplce[3][place3 >> bits]; - // movemask = !(pix == 0) - __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); + // movemask = !(pix == 0) + __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); - BYTE p0 = palookupoffse[0][pix0]; - BYTE p1 = palookupoffse[1][pix1]; - BYTE p2 = palookupoffse[2][pix2]; - BYTE p3 = palookupoffse[3][pix3]; + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; + __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); + SSE_SHADE_SIMPLE(fg); + _mm_maskmoveu_si128(fg, movemask, (char*)dest); + dest += dc_pitch; + } while (--count); + } + else + { + SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - fg_hi = _mm_mullo_epi16(fg_hi, mlight_hi); - fg_hi = _mm_srli_epi16(fg_hi, 8); - fg_lo = _mm_mullo_epi16(fg_lo, mlight_lo); - fg_lo = _mm_srli_epi16(fg_lo, 8); - fg = _mm_packus_epi16(fg_lo, fg_hi); - _mm_maskmoveu_si128(fg, movemask, (char*)dest); - dest += dc_pitch; - } while (--count); + BYTE pix0 = bufplce[0][place0 >> bits]; + BYTE pix1 = bufplce[1][place1 >> bits]; + BYTE pix2 = bufplce[2][place2 >> bits]; + BYTE pix3 = bufplce[3][place3 >> bits]; + + // movemask = !(pix == 0) + __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); + SSE_SHADE(fg, shade_constants); + _mm_maskmoveu_si128(fg, movemask, (char*)dest); + dest += dc_pitch; + } while (--count); + } // Is this needed? Global variables makes it tricky to know.. vplce[0] = local_vplce[0]; @@ -3503,7 +3597,7 @@ void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip) clearbufshort (spanend+t2, b2-t2, x); } - R_SetColorMapLight(basecolormapdata, (float)light, wallshade); + R_SetColorMapLight(basecolormap, (float)light, wallshade); for (--x; x >= x1; --x) { @@ -3528,7 +3622,7 @@ void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip) clearbufshort (spanend+t2, b2-t2, x); } rcolormap = lcolormap; - R_SetColorMapLight(basecolormapdata, (float)light, wallshade); + R_SetColorMapLight(basecolormap, (float)light, wallshade); } else { @@ -3578,6 +3672,7 @@ static void R_DrawFogBoundarySection_RGBA(int y, int y2, int x1) uint32_t *dest = ylookup[y] + (uint32_t*)dc_destorg; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants constants = dc_shade_constants; for (; y < y2; ++y) { @@ -3585,7 +3680,37 @@ static void R_DrawFogBoundarySection_RGBA(int y, int y2, int x1) int x = x1; do { - dest[x] = shade_pal_index(colormap[dest[x]], light); + uint32_t red = (dest[x] >> 16) & 0xff; + uint32_t green = (dest[x] >> 8) & 0xff; + uint32_t blue = dest[x] & 0xff; + + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; + + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; + + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; + + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + + dest[x] = 0xff000000 | (red << 16) | (green << 8) | blue; } while (++x <= x2); dest += dc_pitch; } @@ -3598,10 +3723,41 @@ static void R_DrawFogBoundaryLine_RGBA(int y, int x) uint32_t *dest = ylookup[y] + (uint32_t*)dc_destorg; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants constants = dc_shade_constants; do { - dest[x] = shade_pal_index(colormap[dest[x]], light); + uint32_t red = (dest[x] >> 16) & 0xff; + uint32_t green = (dest[x] >> 8) & 0xff; + uint32_t blue = dest[x] & 0xff; + + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; + + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; + + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; + + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + + dest[x] = 0xff000000 | (red << 16) | (green << 8) | blue; } while (++x <= x2); } @@ -3627,7 +3783,9 @@ void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip) clearbufshort(spanend + t2, b2 - t2, x); } - R_SetColorMapLight(basecolormapdata, (float)light, wallshade); + R_SetColorMapLight(basecolormap, (float)light, wallshade); + + BYTE *fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); for (--x; x >= x1; --x) { @@ -3652,11 +3810,12 @@ void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip) clearbufshort(spanend + t2, b2 - t2, x); } rcolormap = lcolormap; - R_SetColorMapLight(basecolormapdata, (float)light, wallshade); + R_SetColorMapLight(basecolormap, (float)light, wallshade); + fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); } else { - if (dc_colormap != basecolormapdata) + if (fake_dc_colormap != basecolormapdata) { stop = MIN(t1, b2); while (t2 < stop) @@ -3741,7 +3900,6 @@ fixed_t tmvline1_add_RGBA() { DWORD fracstep = dc_iscale; DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; uint32_t *dest = (uint32_t*)dc_dest; @@ -3749,6 +3907,7 @@ fixed_t tmvline1_add_RGBA() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -3758,7 +3917,7 @@ fixed_t tmvline1_add_RGBA() BYTE pix = source[frac >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(colormap[pix], light); + uint32_t fg = shade_pal_index(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -3825,6 +3984,8 @@ void tmvline4_add_RGBA() light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); + ShadeConstants shade_constants = dc_shade_constants; + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -3835,7 +3996,7 @@ void tmvline4_add_RGBA() BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); + uint32_t fg = shade_pal_index(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -3898,7 +4059,6 @@ fixed_t tmvline1_addclamp_RGBA() { DWORD fracstep = dc_iscale; DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; uint32_t *dest = (uint32_t*)dc_dest; @@ -3906,6 +4066,7 @@ fixed_t tmvline1_addclamp_RGBA() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -3915,7 +4076,7 @@ fixed_t tmvline1_addclamp_RGBA() BYTE pix = source[frac >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(colormap[pix], light); + uint32_t fg = shade_pal_index(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -3981,6 +4142,8 @@ void tmvline4_addclamp_RGBA() light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); + ShadeConstants shade_constants = dc_shade_constants; + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -3991,7 +4154,7 @@ void tmvline4_addclamp_RGBA() BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); + uint32_t fg = shade_pal_index(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -4059,6 +4222,7 @@ fixed_t tmvline1_subclamp_RGBA() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -4068,7 +4232,7 @@ fixed_t tmvline1_subclamp_RGBA() BYTE pix = source[frac >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(colormap[pix], light); + uint32_t fg = shade_pal_index(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -4133,6 +4297,8 @@ void tmvline4_subclamp_RGBA() light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); + ShadeConstants shade_constants = dc_shade_constants; + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -4143,7 +4309,7 @@ void tmvline4_subclamp_RGBA() BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); + uint32_t fg = shade_pal_index(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -4211,6 +4377,7 @@ fixed_t tmvline1_revsubclamp_RGBA() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -4220,7 +4387,7 @@ fixed_t tmvline1_revsubclamp_RGBA() BYTE pix = source[frac >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(colormap[pix], light); + uint32_t fg = shade_pal_index(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -4285,6 +4452,8 @@ void tmvline4_revsubclamp_RGBA() light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); + ShadeConstants shade_constants = dc_shade_constants; + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -4295,7 +4464,7 @@ void tmvline4_revsubclamp_RGBA() BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); + uint32_t fg = shade_pal_index(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -4793,15 +4962,15 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, colfunc = R_DrawShadedColumn; hcolfunc_post1 = rt_shaded1col; hcolfunc_post4 = rt_shaded4cols; - dc_color = fixedcolormap ? fixedcolormap[APART(color)] : basecolormap->Maps[APART(color)]; - lighttable_t *colormap = (basecolormap = &ShadeFakeColormap[16-alpha])->Maps; + dc_color = fixedcolormap ? fixedcolormap->Maps[APART(color)] : basecolormap->Maps[APART(color)]; + basecolormap = &ShadeFakeColormap[16-alpha]; if (fixedlightlev >= 0 && fixedcolormap == NULL) { - R_SetColorMapLight(colormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); } else { - R_SetColorMapLight(colormap, 0, 0); + R_SetColorMapLight(basecolormap, 0, 0); } return r_columnmethod ? DoDraw1 : DoDraw0; } @@ -4827,7 +4996,7 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, // with the alpha. dc_srccolor = ((((r*x)>>4)<<20) | ((g*x)>>4) | ((((b)*x)>>4)<<10)) & 0x3feffbff; hcolfunc_pre = R_FillColumnHoriz; - R_SetColorMapLight(identitymap, 0, 0); + R_SetColorMapLight(&identitycolormap, 0, 0); } if (!R_SetBlendFunc (style.BlendOp, fglevel, bglevel, style.Flags)) @@ -4871,30 +5040,77 @@ bool R_GetTransMaskDrawers (fixed_t (**tmvline1)(), void (**tmvline4)()) return false; } -void R_SetColorMapLight(BYTE *basecolormapdata, float light, int shade) +void R_SetTranslationMap(lighttable_t *translation) { + dc_fcolormap = nullptr; + dc_shade_constants.light_red = 256; + dc_shade_constants.light_green = 256; + dc_shade_constants.light_blue = 256; + dc_shade_constants.light_alpha = 256; + dc_shade_constants.fade_red = 0; + dc_shade_constants.fade_green = 0; + dc_shade_constants.fade_blue = 0; + dc_shade_constants.fade_alpha = 256; + dc_shade_constants.desaturate = 0; + dc_shade_constants.simple_shade = true; if (r_swtruecolor) { - dc_colormap = basecolormapdata; - dc_light = LIGHTSCALE(light, shade); + dc_colormap = translation; + dc_light = 0; } else { - dc_colormap = basecolormapdata + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); + dc_colormap = translation; dc_light = 0; } } -void R_SetDSColorMapLight(BYTE *basecolormapdata, float light, int shade) +void R_SetColorMapLight(FColormap *base_colormap, float light, int shade) { + dc_fcolormap = base_colormap; + dc_shade_constants.light_red = dc_fcolormap->Color.r * 256 / 255; + dc_shade_constants.light_green = dc_fcolormap->Color.g * 256 / 255; + dc_shade_constants.light_blue = dc_fcolormap->Color.b * 256 / 255; + dc_shade_constants.light_alpha = dc_fcolormap->Color.a * 256 / 255; + dc_shade_constants.fade_red = dc_fcolormap->Fade.r; + dc_shade_constants.fade_green = dc_fcolormap->Fade.g; + dc_shade_constants.fade_blue = dc_fcolormap->Fade.b; + dc_shade_constants.fade_alpha = dc_fcolormap->Fade.a; + dc_shade_constants.desaturate = MIN(std::abs(dc_fcolormap->Desaturate), 255) * 255 / 256; + dc_shade_constants.simple_shade = (dc_fcolormap->Color.d == 0x00ffffff && dc_fcolormap->Fade.d == 0x00000000 && dc_fcolormap->Desaturate == 0); if (r_swtruecolor) { - ds_colormap = basecolormapdata; + dc_colormap = base_colormap->Maps; + dc_light = LIGHTSCALE(light, shade); + } + else + { + dc_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); + dc_light = 0; + } +} + +void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade) +{ + ds_fcolormap = base_colormap; + ds_shade_constants.light_red = ds_fcolormap->Color.r * 256 / 255; + ds_shade_constants.light_green = ds_fcolormap->Color.g * 256 / 255; + ds_shade_constants.light_blue = ds_fcolormap->Color.b * 256 / 255; + ds_shade_constants.light_alpha = ds_fcolormap->Color.a * 256 / 255; + ds_shade_constants.fade_red = ds_fcolormap->Fade.r; + ds_shade_constants.fade_green = ds_fcolormap->Fade.g; + ds_shade_constants.fade_blue = ds_fcolormap->Fade.b; + ds_shade_constants.fade_alpha = ds_fcolormap->Fade.a; + ds_shade_constants.desaturate = MIN(std::abs(ds_fcolormap->Desaturate), 255) * 255 / 256; + ds_shade_constants.simple_shade = (ds_fcolormap->Color.d == 0x00ffffff && ds_fcolormap->Fade.d == 0x00000000 && ds_fcolormap->Desaturate == 0); + if (r_swtruecolor) + { + ds_colormap = base_colormap->Maps; ds_light = LIGHTSCALE(light, shade); } else { - ds_colormap = basecolormapdata + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); + ds_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); ds_light = 0; } } diff --git a/src/r_draw.h b/src/r_draw.h index 2eefff9bd..cc3b10935 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -25,11 +25,16 @@ #include "r_defs.h" +struct FColormap; +struct ShadeConstants; + extern "C" int ylookup[MAXHEIGHT]; extern "C" int dc_pitch; // [RH] Distance between rows extern "C" lighttable_t*dc_colormap; +extern "C" FColormap *dc_fcolormap; +extern "C" ShadeConstants dc_shade_constants; extern "C" fixed_t dc_light; extern "C" int dc_x; extern "C" int dc_yl; @@ -93,7 +98,7 @@ extern void (*R_DrawTranslatedColumn)(void); // Span drawing for rows, floor/ceiling. No Spectre effect needed. extern void (*R_DrawSpan)(void); void R_SetupSpanBits(FTexture *tex); -void R_SetSpanColormap(BYTE *colormap); +void R_SetSpanColormap(FDynamicColormap *colormap, int shade); void R_SetSpanSource(const BYTE *pixels); // Span drawing for masked textures. @@ -321,9 +326,10 @@ extern "C" int ds_y; extern "C" int ds_x1; extern "C" int ds_x2; +extern "C" FColormap* ds_fcolormap; extern "C" lighttable_t* ds_colormap; -//extern "C" dsfixed_t ds_light; -#define ds_light dc_light +extern "C" ShadeConstants ds_shade_constants; +extern "C" dsfixed_t ds_light; extern "C" dsfixed_t ds_xfrac; extern "C" dsfixed_t ds_yfrac; @@ -341,6 +347,7 @@ extern "C" int ds_color; // [RH] For flat color (no texturing) extern BYTE shadetables[/*NUMCOLORMAPS*16*256*/]; extern FDynamicColormap ShadeFakeColormap[16]; extern BYTE identitymap[256]; +extern FDynamicColormap identitycolormap; extern BYTE *dc_translation; // [RH] Added for muliresolution support @@ -389,9 +396,11 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); // Sets dc_colormap and dc_light to their appropriate values depending on the output format (pal vs true color) -void R_SetColorMapLight(BYTE *base_colormap, float light, int shade); +void R_SetColorMapLight(FColormap *base_colormap, float light, int shade); // Same as R_SetColorMapLight, but for ds_colormap and ds_light -void R_SetDSColorMapLight(BYTE *base_colormap, float light, int shade); +void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade); + +void R_SetTranslationMap(lighttable_t *translation); #endif diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index d390fc54d..ff5c0d82f 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -108,7 +108,6 @@ void rt_copy4cols_RGBA_c (int sx, int yl, int yh) // Maps one span at hx to the screen at sx. void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -120,14 +119,14 @@ void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) count++; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; - colormap = dc_colormap; dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; if (count & 1) { - *dest = shade_pal_index(colormap[*source], light); + *dest = shade_pal_index(*source, light, shade_constants); source += 4; dest += pitch; } @@ -135,8 +134,8 @@ void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) return; do { - dest[0] = shade_pal_index(colormap[source[0]], light); - dest[pitch] = shade_pal_index(colormap[source[4]], light); + dest[0] = shade_pal_index(source[0], light, shade_constants); + dest[pitch] = shade_pal_index(source[4], light, shade_constants); source += 8; dest += pitch*2; } while (--count); @@ -145,7 +144,6 @@ void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) // Maps all four spans to the screen starting at sx. void rt_map4cols_RGBA_c (int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -157,17 +155,17 @@ void rt_map4cols_RGBA_c (int sx, int yl, int yh) count++; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; - colormap = dc_colormap; dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; if (count & 1) { - dest[0] = shade_pal_index(colormap[source[0]], light); - dest[1] = shade_pal_index(colormap[source[1]], light); - dest[2] = shade_pal_index(colormap[source[2]], light); - dest[3] = shade_pal_index(colormap[source[3]], light); + dest[0] = shade_pal_index(source[0], light, shade_constants); + dest[1] = shade_pal_index(source[1], light, shade_constants); + dest[2] = shade_pal_index(source[2], light, shade_constants); + dest[3] = shade_pal_index(source[3], light, shade_constants); source += 4; dest += pitch; } @@ -175,14 +173,14 @@ void rt_map4cols_RGBA_c (int sx, int yl, int yh) return; do { - dest[0] = shade_pal_index(colormap[source[0]], light); - dest[1] = shade_pal_index(colormap[source[1]], light); - dest[2] = shade_pal_index(colormap[source[2]], light); - dest[3] = shade_pal_index(colormap[source[3]], light); - dest[pitch] = shade_pal_index(colormap[source[4]], light); - dest[pitch + 1] = shade_pal_index(colormap[source[5]], light); - dest[pitch + 2] = shade_pal_index(colormap[source[6]], light); - dest[pitch + 3] = shade_pal_index(colormap[source[7]], light); + dest[0] = shade_pal_index(source[0], light, shade_constants); + dest[1] = shade_pal_index(source[1], light, shade_constants); + dest[2] = shade_pal_index(source[2], light, shade_constants); + dest[3] = shade_pal_index(source[3], light, shade_constants); + dest[pitch] = shade_pal_index(source[4], light, shade_constants); + dest[pitch + 1] = shade_pal_index(source[5], light, shade_constants); + dest[pitch + 2] = shade_pal_index(source[6], light, shade_constants); + dest[pitch + 3] = shade_pal_index(source[7], light, shade_constants); source += 8; dest += pitch*2; } while (--count); @@ -191,7 +189,6 @@ void rt_map4cols_RGBA_c (int sx, int yl, int yh) // Maps all four spans to the screen starting at sx. void rt_map4cols_RGBA_SSE(int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -202,82 +199,114 @@ void rt_map4cols_RGBA_SSE(int sx, int yl, int yh) return; count++; + ShadeConstants shade_constants = dc_shade_constants; uint32_t light = calc_light_multiplier(dc_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; - colormap = dc_colormap; dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl * 4]; pitch = dc_pitch; - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT(light); - if (count & 1) { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - fg_hi = _mm_mullo_epi16(fg_hi, mlight); - fg_hi = _mm_srli_epi16(fg_hi, 8); - fg_lo = _mm_mullo_epi16(fg_lo, mlight); - fg_lo = _mm_srli_epi16(fg_lo, 8); - - fg = _mm_packus_epi16(fg_lo, fg_hi); - _mm_storeu_si128((__m128i*)dest, fg); - - source += 4; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - // shade_pal_index 0-3 - { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; + if (count & 1) { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + // shade_pal_index: __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - fg_hi = _mm_mullo_epi16(fg_hi, mlight); - fg_hi = _mm_srli_epi16(fg_hi, 8); - fg_lo = _mm_mullo_epi16(fg_lo, mlight); - fg_lo = _mm_srli_epi16(fg_lo, 8); - - fg = _mm_packus_epi16(fg_lo, fg_hi); + SSE_SHADE_SIMPLE(fg); _mm_storeu_si128((__m128i*)dest, fg); + + source += 4; + dest += pitch; } + if (!(count >>= 1)) + return; - // shade_pal_index 4-7 (pitch) - { - uint32_t p0 = colormap[source[4]]; - uint32_t p1 = colormap[source[5]]; - uint32_t p2 = colormap[source[6]]; - uint32_t p3 = colormap[source[7]]; + do { + // shade_pal_index 0-3 + { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + } + + // shade_pal_index 4-7 (pitch) + { + uint32_t p0 = source[4]; + uint32_t p1 = source[5]; + uint32_t p2 = source[6]; + uint32_t p3 = source[7]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)(dest + pitch), fg); + } + + source += 8; + dest += pitch * 2; + } while (--count); + } + else + { + SSE_SHADE_INIT(light, shade_constants); + + if (count & 1) { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - fg_hi = _mm_mullo_epi16(fg_hi, mlight); - fg_hi = _mm_srli_epi16(fg_hi, 8); - fg_lo = _mm_mullo_epi16(fg_lo, mlight); - fg_lo = _mm_srli_epi16(fg_lo, 8); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); - fg = _mm_packus_epi16(fg_lo, fg_hi); - _mm_storeu_si128((__m128i*)(dest + pitch), fg); + source += 4; + dest += pitch; } + if (!(count >>= 1)) + return; - source += 8; - dest += pitch * 2; - } while (--count); + do { + // shade_pal_index 0-3 + { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + } + + // shade_pal_index 4-7 (pitch) + { + uint32_t p0 = source[4]; + uint32_t p1 = source[5]; + uint32_t p2 = source[6]; + uint32_t p3 = source[7]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)(dest + pitch), fg); + } + + source += 8; + dest += pitch * 2; + } while (--count); + } } void rt_Translate1col_RGBA_c(const BYTE *translation, int hx, int yl, int yh) @@ -385,7 +414,6 @@ void rt_tlate4cols_RGBA_c (int sx, int yl, int yh) // Adds one span at hx to the screen at sx without clamping. void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -399,15 +427,15 @@ void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg = shade_pal_index(*source, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -430,7 +458,6 @@ void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) // Adds all four spans to the screen starting at sx without clamping. void rt_add4cols_RGBA_c (int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -444,9 +471,9 @@ void rt_add4cols_RGBA_c (int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -454,7 +481,7 @@ void rt_add4cols_RGBA_c (int sx, int yl, int yh) do { for (int i = 0; i < 4; i++) { - uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg = shade_pal_index(source[i], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -479,7 +506,6 @@ void rt_add4cols_RGBA_c (int sx, int yl, int yh) #ifndef NO_SSE void rt_add4cols_RGBA_SSE(int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -493,7 +519,6 @@ void rt_add4cols_RGBA_SSE(int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl * 4]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; @@ -501,40 +526,80 @@ void rt_add4cols_RGBA_SSE(int sx, int yl, int yh) uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + ShadeConstants shade_constants = dc_shade_constants; - do { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT(light); - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - fg_hi = _mm_mullo_epi16(fg_hi, mlight); - fg_hi = _mm_srli_epi16(fg_hi, 8); - fg_lo = _mm_mullo_epi16(fg_lo, mlight); - fg_lo = _mm_srli_epi16(fg_lo, 8); + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - source += 4; - dest += pitch; - } while (--count); + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } + else + { + SSE_SHADE_INIT(light, shade_constants); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } } #endif @@ -571,7 +636,7 @@ void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh) source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -613,7 +678,7 @@ void rt_shaded4cols_RGBA_c (int sx, int yl, int yh) source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -659,7 +724,7 @@ void rt_shaded4cols_RGBA_SSE(int sx, int yl, int yh) source = &dc_temp_rgba[yl * 4]; pitch = dc_pitch; - __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index(dc_color, calc_light_multiplier(dc_light))), _mm_setzero_si128()); + __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light))), _mm_setzero_si128()); __m128i alpha_one = _mm_set1_epi16(64); do { @@ -694,7 +759,6 @@ void rt_shaded4cols_RGBA_SSE(int sx, int yl, int yh) // Adds one span at hx to the screen at sx with clamping. void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -708,15 +772,15 @@ void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg = shade_pal_index(*source, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -738,7 +802,6 @@ void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) // Adds all four spans to the screen starting at sx with clamping. void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -752,9 +815,9 @@ void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -762,7 +825,7 @@ void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) do { for (int i = 0; i < 4; i++) { - uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg = shade_pal_index(source[i], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -786,7 +849,6 @@ void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) #ifndef NO_SSE void rt_addclamp4cols_RGBA_SSE(int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -800,7 +862,6 @@ void rt_addclamp4cols_RGBA_SSE(int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl * 4]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; @@ -808,40 +869,80 @@ void rt_addclamp4cols_RGBA_SSE(int sx, int yl, int yh) uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + ShadeConstants shade_constants = dc_shade_constants; - do { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT(light); - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - fg_hi = _mm_mullo_epi16(fg_hi, mlight); - fg_hi = _mm_srli_epi16(fg_hi, 8); - fg_lo = _mm_mullo_epi16(fg_lo, mlight); - fg_lo = _mm_srli_epi16(fg_lo, 8); + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - source += 4; - dest += pitch; - } while (--count); + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } + else + { + SSE_SHADE_INIT(light, shade_constants); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } } #endif @@ -862,7 +963,6 @@ void rt_tlateaddclamp4cols_RGBA_c (int sx, int yl, int yh) // Subtracts one span at hx to the screen at sx with clamping. void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -876,15 +976,15 @@ void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg = shade_pal_index(*source, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -906,7 +1006,6 @@ void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) // Subtracts all four spans to the screen starting at sx with clamping. void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -920,9 +1019,9 @@ void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -930,7 +1029,7 @@ void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) do { for (int i = 0; i < 4; i++) { - uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg = shade_pal_index(source[i], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -968,7 +1067,6 @@ void rt_tlatesubclamp4cols_RGBA_c (int sx, int yl, int yh) // Subtracts one span at hx from the screen at sx with clamping. void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -982,15 +1080,15 @@ void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg = shade_pal_index(*source, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1012,7 +1110,6 @@ void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) // Subtracts all four spans from the screen starting at sx with clamping. void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -1026,9 +1123,9 @@ void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -1036,7 +1133,7 @@ void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) do { for (int i = 0; i < 4; i++) { - uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg = shade_pal_index(source[i], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; diff --git a/src/r_main.cpp b/src/r_main.cpp index aaf8fc532..a795f8016 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -119,7 +119,7 @@ double FocalLengthX; double FocalLengthY; FDynamicColormap*basecolormap; // [RH] colormap currently drawing with int fixedlightlev; -lighttable_t *fixedcolormap; +FColormap *fixedcolormap; FSpecialColormap *realfixedcolormap; double WallTMapScale2; @@ -464,11 +464,11 @@ void R_SetupColormap(player_t *player) // Render everything fullbright. The copy to video memory will // apply the special colormap, so it won't be restricted to the // palette. - fixedcolormap = realcolormaps; + fixedcolormap = &realcolormaps; } else { - fixedcolormap = SpecialColormaps[player->fixedcolormap].Colormap; + fixedcolormap = &SpecialColormaps[player->fixedcolormap]; } } else if (player->fixedlightlevel >= 0 && player->fixedlightlevel < NUMCOLORMAPS) @@ -479,7 +479,7 @@ void R_SetupColormap(player_t *player) // [RH] Inverse light for shooting the Sigil if (fixedcolormap == NULL && extralight == INT_MIN) { - fixedcolormap = SpecialColormaps[INVERSECOLORMAP].Colormap; + fixedcolormap = &SpecialColormaps[INVERSECOLORMAP]; extralight = 0; } } diff --git a/src/r_main.h b/src/r_main.h index e8be3c1a3..0db704df1 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -90,25 +90,162 @@ extern bool r_dontmaplines; // Converts fixedlightlev into a shade value #define FIXEDLIGHT2SHADE(lightlev) (((lightlev) >> COLORMAPSHIFT) << FRACBITS) +struct ShadeConstants +{ + uint16_t light_alpha; + uint16_t light_red; + uint16_t light_green; + uint16_t light_blue; + uint16_t fade_alpha; + uint16_t fade_red; + uint16_t fade_green; + uint16_t fade_blue; + uint16_t desaturate; + bool simple_shade; +}; + // calculates the light constant passed to the shade_pal_index function inline uint32_t calc_light_multiplier(dsfixed_t light) { return 256 - (light >> (FRACBITS - 8)); } +// Give the compiler a strong hint we want these functions inlined: +#ifndef FORCEINLINE +#if defined(_MSC_VER) +#define FORCEINLINE __forceinline +#elif defined(__GNUC__) +#define FORCEINLINE __attribute__((always_inline)) +#else +#define FORCEINLINE inline +#endif +#endif + // Calculates a ARGB8 color for the given palette index and light multiplier -inline uint32_t shade_pal_index(uint32_t index, uint32_t light) +FORCEINLINE uint32_t shade_pal_index_simple(uint32_t index, uint32_t light) { const PalEntry &color = GPalette.BaseColors[index]; uint32_t red = color.r; uint32_t green = color.g; uint32_t blue = color.b; + red = red * light / 256; green = green * light / 256; blue = blue * light / 256; + return 0xff000000 | (red << 16) | (green << 8) | blue; } +// Calculates a ARGB8 color for the given palette index, light multiplier and dynamic colormap +FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants) +{ + const PalEntry &color = GPalette.BaseColors[index]; + uint32_t red = color.r; + uint32_t green = color.g; + uint32_t blue = color.b; + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; + + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; + + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; + + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + +// Calculate constants for a simple shade +#define SSE_SHADE_SIMPLE_INIT(light) \ + __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ + __m128i mlight_lo = mlight_hi; + +// Calculate constants for a simple shade with different light levels for each pixel +#define SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0) \ + __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ + __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); + +// Simple shade 4 pixels +#define SSE_SHADE_SIMPLE(fg) { \ + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \ + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ + fg_hi = _mm_mullo_epi16(fg_hi, mlight_hi); \ + fg_hi = _mm_srli_epi16(fg_hi, 8); \ + fg_lo = _mm_mullo_epi16(fg_lo, mlight_lo); \ + fg_lo = _mm_srli_epi16(fg_lo, 8); \ + fg = _mm_packus_epi16(fg_lo, fg_hi); \ +} + +// Calculate constants for a complex shade +#define SSE_SHADE_INIT(light, shade_constants) \ + __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ + __m128i mlight_lo = mlight_hi; \ + __m128i color = _mm_set_epi16( \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ + __m128i fade = _mm_set_epi16( \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ + __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ + __m128i fade_amount_lo = fade_amount_hi; \ + __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ + +// Calculate constants for a complex shade with different light levels for each pixel +#define SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants) \ + __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ + __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); \ + __m128i color = _mm_set_epi16( \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ + __m128i fade = _mm_set_epi16( \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ + __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ + __m128i fade_amount_lo = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_lo)); \ + __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ + +// Complex shade 4 pixels +#define SSE_SHADE(fg, shade_constants) { \ + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \ + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ + \ + __m128i intensity_hi = _mm_mullo_epi16(fg_hi, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \ + uint16_t intensity_hi0 = ((intensity_hi.m128i_u16[2] + intensity_hi.m128i_u16[1] + intensity_hi.m128i_u16[0]) >> 8) * shade_constants.desaturate; \ + uint16_t intensity_hi1 = ((intensity_hi.m128i_u16[6] + intensity_hi.m128i_u16[5] + intensity_hi.m128i_u16[4]) >> 8) * shade_constants.desaturate; \ + intensity_hi = _mm_set_epi16(intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi0, intensity_hi0, intensity_hi0, intensity_hi0); \ + \ + fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, inv_desaturate), intensity_hi), 8); \ + fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mlight_hi), fade_amount_hi), 8); \ + fg_hi = _mm_srli_epi16(_mm_mullo_epi16(fg_hi, color), 8); \ + \ + __m128i intensity_lo = _mm_mullo_epi16(fg_lo, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \ + uint16_t intensity_lo0 = ((intensity_lo.m128i_u16[2] + intensity_lo.m128i_u16[1] + intensity_lo.m128i_u16[0]) >> 8) * shade_constants.desaturate; \ + uint16_t intensity_lo1 = ((intensity_lo.m128i_u16[6] + intensity_lo.m128i_u16[5] + intensity_lo.m128i_u16[4]) >> 8) * shade_constants.desaturate; \ + intensity_lo = _mm_set_epi16(intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo0, intensity_lo0, intensity_lo0, intensity_lo0); \ + \ + fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, inv_desaturate), intensity_lo), 8); \ + fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mlight_lo), fade_amount_lo), 8); \ + fg_lo = _mm_srli_epi16(_mm_mullo_epi16(fg_lo, color), 8); \ + \ + fg = _mm_packus_epi16(fg_lo, fg_hi); \ +} + extern bool r_swtruecolor; extern double GlobVis; @@ -125,7 +262,7 @@ extern double r_SpriteVisibility; extern int r_actualextralight; extern bool foggy; extern int fixedlightlev; -extern lighttable_t* fixedcolormap; +extern FColormap* fixedcolormap; extern FSpecialColormap*realfixedcolormap; diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 9805ab200..26d579d6d 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -227,7 +227,7 @@ void R_MapPlane (int y, int x1) if (plane_shade) { // Determine lighting based on the span's distance from the viewer. - R_SetDSColorMapLight(basecolormap->Maps, GlobVis * fabs(CenterY - y), planeshade); + R_SetDSColorMapLight(basecolormap, GlobVis * fabs(CenterY - y), planeshade); } #ifdef X86_ASM @@ -616,7 +616,7 @@ void R_MapColoredPlane_RGBA(int y, int x1) uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; int count = (spanend[y] - x1 + 1); uint32_t light = calc_light_multiplier(ds_light); - uint32_t color = shade_pal_index(ds_color, light); + uint32_t color = shade_pal_index_simple(ds_color, light); for (int i = 0; i < count; i++) dest[i] = color; } @@ -1598,7 +1598,7 @@ void R_DrawSkyPlane (visplane_t *pl) else { fakefixed = true; - fixedcolormap = NormalLight.Maps; + fixedcolormap = &NormalLight; R_SetColorMapLight(fixedcolormap, 0, 0); } @@ -1683,7 +1683,7 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t ds_light = 0; if (fixedlightlev >= 0) { - R_SetDSColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); plane_shade = false; } else if (fixedcolormap) @@ -1860,7 +1860,7 @@ void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t a ds_light = 0; if (fixedlightlev >= 0) { - R_SetDSColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); plane_shade = false; } else if (fixedcolormap) @@ -1870,7 +1870,7 @@ void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t a } else { - R_SetDSColorMapLight(basecolormap->Maps, 0, 0); + R_SetDSColorMapLight(basecolormap, 0, 0); plane_shade = true; } diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 43590247e..bd2c7d22b 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -177,7 +177,7 @@ static void BlastMaskedColumn (void (*blastfunc)(const BYTE *pixels, const FText // calculate lighting if (fixedcolormap == NULL && fixedlightlev < 0) { - R_SetColorMapLight(basecolormap->Maps, rw_light, wallshade); + R_SetColorMapLight(basecolormap, rw_light, wallshade); } dc_iscale = xs_Fix<16>::ToFix(MaskedSWall[dc_x] * MaskedScaleY); @@ -313,7 +313,7 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) rw_scalestep = ds->iscalestep; if (fixedlightlev >= 0) - R_SetColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) R_SetColorMapLight(fixedcolormap, 0, 0); @@ -630,7 +630,7 @@ void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover) } if (fixedlightlev >= 0) - R_SetColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) R_SetColorMapLight(fixedcolormap, 0, 0); @@ -1126,6 +1126,11 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l palookuplight[3] = 0; } + if (fixedcolormap) + R_SetColorMapLight(fixedcolormap, 0, 0); + else + R_SetColorMapLight(basecolormap, 0, 0); + for(; (x < x2) && (x & 3); ++x) { light += rw_lightstep; @@ -1137,7 +1142,7 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l if (!fixed) { // calculate lighting - R_SetColorMapLight(basecolormapdata, light, wallshade); + R_SetColorMapLight(basecolormap, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1241,7 +1246,7 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l if (!fixed) { // calculate lighting - R_SetColorMapLight(basecolormapdata, light, wallshade); + R_SetColorMapLight(basecolormap, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1496,6 +1501,11 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ palookuplight[3] = 0; } + if (fixedcolormap) + R_SetColorMapLight(fixedcolormap, 0, 0); + else + R_SetColorMapLight(basecolormap, 0, 0); + for(; (x < x2) && (((size_t)pixel >> pixelshift) & 3); ++x, pixel += pixelsize) { light += rw_lightstep; @@ -1505,7 +1515,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ if (!fixed) { // calculate lighting - R_SetColorMapLight(basecolormapdata, light, wallshade); + R_SetColorMapLight(basecolormap, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1605,7 +1615,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ if (!fixed) { // calculate lighting - R_SetColorMapLight(basecolormapdata, light, wallshade); + R_SetColorMapLight(basecolormap, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1690,6 +1700,11 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f palookuplight[3] = 0; } + if (fixedcolormap) + R_SetColorMapLight(fixedcolormap, 0, 0); + else + R_SetColorMapLight(basecolormap, 0, 0); + for(; (x < x2) && (((size_t)pixel >> pixelshift) & 3); ++x, pixel += pixelsize) { light += rw_lightstep; @@ -1699,7 +1714,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f if (!fixed) { // calculate lighting - R_SetColorMapLight(basecolormapdata, light, wallshade); + R_SetColorMapLight(basecolormap, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1801,7 +1816,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f if (!fixed) { // calculate lighting - R_SetColorMapLight(basecolormapdata, light, wallshade); + R_SetColorMapLight(basecolormap, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1839,7 +1854,7 @@ void R_RenderSegLoop () fixed_t xoffset = rw_offset; if (fixedlightlev >= 0) - R_SetColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) R_SetColorMapLight(fixedcolormap, 0, 0); @@ -3238,11 +3253,11 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, rw_light = rw_lightleft + (x1 - WallC.sx1) * rw_lightstep; if (fixedlightlev >= 0) - R_SetColorMapLight(usecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetColorMapLight(usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) R_SetColorMapLight(fixedcolormap, 0, 0); else if (!foggy && (decal->RenderFlags & RF_FULLBRIGHT)) - R_SetColorMapLight(usecolormap->Maps, 0, 0); + R_SetColorMapLight(usecolormap, 0, 0); else calclighting = true; @@ -3293,7 +3308,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap->Maps, rw_light, wallshade); + R_SetColorMapLight(usecolormap, rw_light, wallshade); } R_WallSpriteColumn (R_DrawMaskedColumn); dc_x++; @@ -3303,7 +3318,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap->Maps, rw_light, wallshade); + R_SetColorMapLight(usecolormap, rw_light, wallshade); } rt_initcols(nullptr); for (int zz = 4; zz; --zz) @@ -3318,7 +3333,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap->Maps, rw_light, wallshade); + R_SetColorMapLight(usecolormap, rw_light, wallshade); } R_WallSpriteColumn (R_DrawMaskedColumn); dc_x++; diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index ee6ac5fed..645741a2a 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -323,7 +323,7 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin // curse Doom's overuse of global variables in the renderer. // These get clobbered by rendering to a camera texture but they need to be preserved so the final rendering can be done with the correct palette. - unsigned char *savecolormap = fixedcolormap; + FColormap *savecolormap = fixedcolormap; FSpecialColormap *savecm = realfixedcolormap; DAngle savedfov = FieldOfView; diff --git a/src/r_things.cpp b/src/r_things.cpp index 6f8038148..c132cc2fd 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -416,7 +416,7 @@ void R_DrawVisSprite (vissprite_t *vis) { // For shaded sprites, R_SetPatchStyle sets a dc_colormap to an alpha table, but // it is the brightest one. We need to get back to the proper light level for // this sprite. - R_SetColorMapLight(dc_colormap, 0, vis->Style.ColormapNum << FRACBITS); + R_SetColorMapLight(dc_fcolormap, 0, vis->Style.ColormapNum << FRACBITS); } if (mode != DontDraw) @@ -538,11 +538,11 @@ void R_DrawWallSprite(vissprite_t *spr) rw_lightstep = float((GlobVis / spr->wallc.sz2 - rw_lightleft) / (spr->wallc.sx2 - spr->wallc.sx1)); rw_light = rw_lightleft + (x1 - spr->wallc.sx1) * rw_lightstep; if (fixedlightlev >= 0) - R_SetColorMapLight(usecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetColorMapLight(usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) R_SetColorMapLight(fixedcolormap, 0, 0); else if (!foggy && (spr->renderflags & RF_FULLBRIGHT)) - R_SetColorMapLight(usecolormap->Maps, 0, 0); + R_SetColorMapLight(usecolormap, 0, 0); else calclighting = true; @@ -593,7 +593,7 @@ void R_DrawWallSprite(vissprite_t *spr) { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap->Maps, rw_light, shade); + R_SetColorMapLight(usecolormap, rw_light, shade); } if (!R_ClipSpriteColumnWithPortals(spr)) R_WallSpriteColumn(R_DrawMaskedColumn); @@ -604,7 +604,7 @@ void R_DrawWallSprite(vissprite_t *spr) { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap->Maps, rw_light, shade); + R_SetColorMapLight(usecolormap, rw_light, shade); } rt_initcols(nullptr); for (int zz = 4; zz; --zz) @@ -620,7 +620,7 @@ void R_DrawWallSprite(vissprite_t *spr) { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap->Maps, rw_light, shade); + R_SetColorMapLight(usecolormap, rw_light, shade); } if (!R_ClipSpriteColumnWithPortals(spr)) R_WallSpriteColumn(R_DrawMaskedColumn); @@ -680,7 +680,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop // Render the voxel, either directly to the screen or offscreen. R_DrawVoxel(spr->pa.vpos, spr->pa.vang, spr->gpos, spr->Angle, - spr->xscale, FLOAT2FIXED(spr->yscale), spr->voxel, spr->Style.BaseColormap + (spr->Style.ColormapNum << COLORMAPSHIFT), cliptop, clipbot, + spr->xscale, FLOAT2FIXED(spr->yscale), spr->voxel, spr->Style.BaseColormap->Maps + (spr->Style.ColormapNum << COLORMAPSHIFT), cliptop, clipbot, minslabz, maxslabz, flags); // Blend the voxel, if that's what we need to do. @@ -1121,19 +1121,19 @@ void R_ProjectSprite (AActor *thing, int fakeside, F3DFloor *fakefloor, F3DFloor } if (fixedlightlev >= 0) { - vis->Style.BaseColormap = mybasecolormap->Maps; + vis->Style.BaseColormap = mybasecolormap; vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; } else if (!foggy && ((renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT))) { // full bright - vis->Style.BaseColormap = mybasecolormap->Maps; + vis->Style.BaseColormap = mybasecolormap; vis->Style.ColormapNum = 0; } else { // diminished light vis->Style.ColormapNum = GETPALOOKUP( r_SpriteVisibility / MAX(tz, MINZ), spriteshade); - vis->Style.BaseColormap = mybasecolormap->Maps; + vis->Style.BaseColormap = mybasecolormap; } } } @@ -1208,7 +1208,7 @@ static void R_ProjectWallSprite(AActor *thing, const DVector3 &pos, FTextureID p vis->bWallSprite = true; vis->Style.ColormapNum = GETPALOOKUP( r_SpriteVisibility / MAX(tz, MINZ), spriteshade); - vis->Style.BaseColormap = basecolormap->Maps; + vis->Style.BaseColormap = basecolormap; vis->wallc = wallc; } @@ -1428,7 +1428,7 @@ void R_DrawPSprite (pspdef_t* psp, int pspnum, AActor *owner, double sx, double if (realfixedcolormap != NULL) { // fixed color - vis->Style.BaseColormap = realfixedcolormap->Colormap; + vis->Style.BaseColormap = realfixedcolormap; vis->Style.ColormapNum = 0; } else @@ -1439,39 +1439,38 @@ void R_DrawPSprite (pspdef_t* psp, int pspnum, AActor *owner, double sx, double } if (fixedlightlev >= 0) { - vis->Style.BaseColormap = mybasecolormap->Maps; + vis->Style.BaseColormap = mybasecolormap; vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; } else if (!foggy && psp->state->GetFullbright()) { // full bright - vis->Style.BaseColormap = mybasecolormap->Maps; // [RH] use basecolormap + vis->Style.BaseColormap = mybasecolormap; // [RH] use basecolormap vis->Style.ColormapNum = 0; } else { // local light - vis->Style.BaseColormap = mybasecolormap->Maps; + vis->Style.BaseColormap = mybasecolormap; vis->Style.ColormapNum = GETPALOOKUP(0, spriteshade); } } if (camera->Inventory != NULL) { BYTE oldcolormapnum = vis->Style.ColormapNum; - lighttable_t *oldcolormap = vis->Style.BaseColormap; + FColormap *oldcolormap = vis->Style.BaseColormap; camera->Inventory->AlterWeaponSprite (&vis->Style); if (vis->Style.BaseColormap != oldcolormap || vis->Style.ColormapNum != oldcolormapnum) { // The colormap has changed. Is it one we can easily identify? // If not, then don't bother trying to identify it for // hardware accelerated drawing. - if (vis->Style.BaseColormap < SpecialColormaps[0].Colormap || - vis->Style.BaseColormap > SpecialColormaps.Last().Colormap) + if (vis->Style.BaseColormap < &SpecialColormaps[0] || + vis->Style.BaseColormap > &SpecialColormaps.Last()) { noaccel = true; } // Has the basecolormap changed? If so, we can't hardware accelerate it, // since we don't know what it is anymore. - else if (vis->Style.BaseColormap < mybasecolormap->Maps || - vis->Style.BaseColormap >= mybasecolormap->Maps + NUMCOLORMAPS*256) + else if (vis->Style.BaseColormap != mybasecolormap) { noaccel = true; } @@ -1479,13 +1478,13 @@ void R_DrawPSprite (pspdef_t* psp, int pspnum, AActor *owner, double sx, double } // If we're drawing with a special colormap, but shaders for them are disabled, do // not accelerate. - if (!r_shadercolormaps && (vis->Style.BaseColormap >= SpecialColormaps[0].Colormap && - vis->Style.BaseColormap <= SpecialColormaps.Last().Colormap)) + if (!r_shadercolormaps && (vis->Style.BaseColormap >= &SpecialColormaps[0] && + vis->Style.BaseColormap <= &SpecialColormaps.Last())) { noaccel = true; } // If drawing with a BOOM colormap, disable acceleration. - if (mybasecolormap == &NormalLight && NormalLight.Maps != realcolormaps) + if (mybasecolormap == &NormalLight && NormalLight.Maps != realcolormaps.Maps) { noaccel = true; } @@ -1502,7 +1501,7 @@ void R_DrawPSprite (pspdef_t* psp, int pspnum, AActor *owner, double sx, double else { colormap_to_use = basecolormap; - vis->Style.BaseColormap = basecolormap->Maps; + vis->Style.BaseColormap = basecolormap; vis->Style.ColormapNum = 0; vis->Style.RenderStyle = STYLE_Normal; } @@ -1649,12 +1648,10 @@ void R_DrawRemainingPlayerSprites() FColormapStyle colormapstyle; bool usecolormapstyle = false; - if (vis->Style.BaseColormap >= SpecialColormaps[0].Colormap && - vis->Style.BaseColormap < SpecialColormaps[SpecialColormaps.Size()].Colormap) + if (vis->Style.BaseColormap >= &SpecialColormaps[0] && + vis->Style.BaseColormap < &SpecialColormaps[SpecialColormaps.Size()]) { - // Yuck! There needs to be a better way to store colormaps in the vissprite... :( - ptrdiff_t specialmap = (vis->Style.BaseColormap - SpecialColormaps[0].Colormap) / sizeof(FSpecialColormap) + vis->Style.ColormapNum; - special = &SpecialColormaps[specialmap]; + special = static_cast(vis->Style.BaseColormap); } else if (colormap->Color == PalEntry(255,255,255) && colormap->Desaturate == 0) @@ -1912,7 +1909,7 @@ void R_DrawSprite (vissprite_t *spr) int r1, r2; short topclip, botclip; short *clip1, *clip2; - lighttable_t *colormap = spr->Style.BaseColormap; + FColormap *colormap = spr->Style.BaseColormap; int colormapnum = spr->Style.ColormapNum; F3DFloor *rover; FDynamicColormap *mybasecolormap; @@ -2010,18 +2007,18 @@ void R_DrawSprite (vissprite_t *spr) } if (fixedlightlev >= 0) { - spr->Style.BaseColormap = mybasecolormap->Maps; + spr->Style.BaseColormap = mybasecolormap; spr->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; } else if (!foggy && (spr->renderflags & RF_FULLBRIGHT)) { // full bright - spr->Style.BaseColormap = mybasecolormap->Maps; + spr->Style.BaseColormap = mybasecolormap; spr->Style.ColormapNum = 0; } else { // diminished light spriteshade = LIGHT2SHADE(sec->lightlevel + r_actualextralight); - spr->Style.BaseColormap = mybasecolormap->Maps; + spr->Style.BaseColormap = mybasecolormap; spr->Style.ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(MINZ, (double)spr->depth), spriteshade); } } @@ -2438,7 +2435,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade, int x1, x2, y1, y2; vissprite_t* vis; sector_t* heightsec = NULL; - BYTE* map; + FColormap* map; // [ZZ] Particle not visible through the portal plane if (CurrentPortal && !!P_PointOnLineSide(particle->Pos, CurrentPortal->dst)) @@ -2511,7 +2508,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade, botplane = &heightsec->ceilingplane; toppic = sector->GetTexture(sector_t::ceiling); botpic = heightsec->GetTexture(sector_t::ceiling); - map = heightsec->ColorMap->Maps; + map = heightsec->ColorMap; } else if (fakeside == FAKED_BelowFloor) { @@ -2519,7 +2516,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade, botplane = §or->floorplane; toppic = heightsec->GetTexture(sector_t::floor); botpic = sector->GetTexture(sector_t::floor); - map = heightsec->ColorMap->Maps; + map = heightsec->ColorMap; } else { @@ -2527,7 +2524,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade, botplane = &heightsec->floorplane; toppic = heightsec->GetTexture(sector_t::ceiling); botpic = heightsec->GetTexture(sector_t::floor); - map = sector->ColorMap->Maps; + map = sector->ColorMap; } } else @@ -2536,7 +2533,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade, botplane = §or->floorplane; toppic = sector->GetTexture(sector_t::ceiling); botpic = sector->GetTexture(sector_t::floor); - map = sector->ColorMap->Maps; + map = sector->ColorMap; } if (botpic != skyflatnum && particle->Pos.Z < botplane->ZatPoint (particle->Pos)) @@ -2619,7 +2616,7 @@ void R_DrawParticle_C (vissprite_t *vis) { int spacing; BYTE *dest; - BYTE color = vis->Style.BaseColormap[(vis->Style.ColormapNum << COLORMAPSHIFT) + vis->startfrac]; + BYTE color = vis->Style.BaseColormap->Maps[(vis->Style.ColormapNum << COLORMAPSHIFT) + vis->startfrac]; int yl = vis->y1; int ycount = vis->y2 - yl + 1; int x1 = vis->x1; @@ -2685,7 +2682,7 @@ void R_DrawParticle_RGBA(vissprite_t *vis) { int spacing; uint32_t *dest; - BYTE color = vis->Style.BaseColormap[vis->startfrac]; + BYTE color = vis->Style.BaseColormap->Maps[vis->startfrac]; int yl = vis->y1; int ycount = vis->y2 - yl + 1; int x1 = vis->x1; @@ -2693,7 +2690,7 @@ void R_DrawParticle_RGBA(vissprite_t *vis) R_DrawMaskedSegsBehindParticle(vis); - uint32_t fg = shade_pal_index(color, calc_light_multiplier(LIGHTSCALE(0, vis->Style.ColormapNum << FRACBITS))); + uint32_t fg = shade_pal_index_simple(color, calc_light_multiplier(LIGHTSCALE(0, vis->Style.ColormapNum << FRACBITS))); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; diff --git a/src/r_utility.cpp b/src/r_utility.cpp index 71d3f2376..efc901ca5 100644 --- a/src/r_utility.cpp +++ b/src/r_utility.cpp @@ -889,11 +889,11 @@ void R_SetupFrame (AActor *actor) BaseBlendG = GPART(newblend); BaseBlendB = BPART(newblend); BaseBlendA = APART(newblend) / 255.f; - NormalLight.Maps = realcolormaps; + NormalLight.Maps = realcolormaps.Maps; } else { - NormalLight.Maps = realcolormaps + NUMCOLORMAPS*256*newblend; + NormalLight.Maps = realcolormaps.Maps + NUMCOLORMAPS*256*newblend; BaseBlendR = BaseBlendG = BaseBlendB = 0; BaseBlendA = 0.f; } diff --git a/src/v_draw.cpp b/src/v_draw.cpp index ff0427b34..c2dbf31c5 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -171,14 +171,14 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) if (translation != NULL) { - R_SetColorMapLight((lighttable_t *)translation, 0, 0); + R_SetTranslationMap((lighttable_t *)translation); } else { - R_SetColorMapLight(identitymap, 0, 0); + R_SetTranslationMap(identitymap); } - fixedcolormap = dc_colormap; + fixedcolormap = dc_fcolormap; ESPSResult mode = R_SetPatchStyle (parms.style, parms.Alpha, 0, parms.fillcolor); BYTE *destorgsave = dc_destorg; @@ -1025,7 +1025,7 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) { uint32_t *spot = (uint32_t*)GetBuffer() + oldyyshifted + xx; - uint32_t fg = shade_pal_index(basecolor, calc_light_multiplier(0)); + uint32_t fg = shade_pal_index_simple(basecolor, calc_light_multiplier(0)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1394,7 +1394,10 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, // Setup constant texture mapping parameters. R_SetupSpanBits(tex); - R_SetSpanColormap(colormap != NULL ? &colormap->Maps[clamp(shade >> FRACBITS, 0, NUMCOLORMAPS-1) * 256] : identitymap); + if (colormap) + R_SetSpanColormap(colormap, clamp(shade >> FRACBITS, 0, NUMCOLORMAPS - 1)); + else + R_SetSpanColormap(&identitycolormap, 0); R_SetSpanSource(tex->GetPixels()); scalex = double(1u << (32 - ds_xbits)) / scalex; scaley = double(1u << (32 - ds_ybits)) / scaley; From c058ab9cc9ac17a23a86964387e9f138359935f1 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 5 Jun 2016 17:34:51 +0200 Subject: [PATCH 19/94] Fixed non-standard __m128i usage in SSE_SHADE --- src/r_main.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/r_main.h b/src/r_main.h index 0db704df1..5266fb52c 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -226,8 +226,8 @@ FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const Shade __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ \ __m128i intensity_hi = _mm_mullo_epi16(fg_hi, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \ - uint16_t intensity_hi0 = ((intensity_hi.m128i_u16[2] + intensity_hi.m128i_u16[1] + intensity_hi.m128i_u16[0]) >> 8) * shade_constants.desaturate; \ - uint16_t intensity_hi1 = ((intensity_hi.m128i_u16[6] + intensity_hi.m128i_u16[5] + intensity_hi.m128i_u16[4]) >> 8) * shade_constants.desaturate; \ + uint16_t intensity_hi0 = ((_mm_extract_epi16(intensity_hi, 2) + _mm_extract_epi16(intensity_hi, 1) + _mm_extract_epi16(intensity_hi, 0)) >> 8) * shade_constants.desaturate; \ + uint16_t intensity_hi1 = ((_mm_extract_epi16(intensity_hi, 6) + _mm_extract_epi16(intensity_hi, 5) + _mm_extract_epi16(intensity_hi, 4)) >> 8) * shade_constants.desaturate; \ intensity_hi = _mm_set_epi16(intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi0, intensity_hi0, intensity_hi0, intensity_hi0); \ \ fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, inv_desaturate), intensity_hi), 8); \ @@ -235,8 +235,8 @@ FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const Shade fg_hi = _mm_srli_epi16(_mm_mullo_epi16(fg_hi, color), 8); \ \ __m128i intensity_lo = _mm_mullo_epi16(fg_lo, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \ - uint16_t intensity_lo0 = ((intensity_lo.m128i_u16[2] + intensity_lo.m128i_u16[1] + intensity_lo.m128i_u16[0]) >> 8) * shade_constants.desaturate; \ - uint16_t intensity_lo1 = ((intensity_lo.m128i_u16[6] + intensity_lo.m128i_u16[5] + intensity_lo.m128i_u16[4]) >> 8) * shade_constants.desaturate; \ + uint16_t intensity_lo0 = ((_mm_extract_epi16(intensity_lo, 2) + _mm_extract_epi16(intensity_lo, 1) + _mm_extract_epi16(intensity_lo, 0)) >> 8) * shade_constants.desaturate; \ + uint16_t intensity_lo1 = ((_mm_extract_epi16(intensity_lo, 6) + _mm_extract_epi16(intensity_lo, 5) + _mm_extract_epi16(intensity_lo, 4)) >> 8) * shade_constants.desaturate; \ intensity_lo = _mm_set_epi16(intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo0, intensity_lo0, intensity_lo0, intensity_lo0); \ \ fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, inv_desaturate), intensity_lo), 8); \ From c176d38b7e8992f006a99950a249848ba0a0f039 Mon Sep 17 00:00:00 2001 From: "alexey.lysiuk" Date: Sun, 5 Jun 2016 19:41:08 +0300 Subject: [PATCH 20/94] Fixed compilation with Clang --- src/r_draw.cpp | 4 ++-- src/r_main.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 2e21c7038..e809342e9 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -5076,7 +5076,7 @@ void R_SetColorMapLight(FColormap *base_colormap, float light, int shade) dc_shade_constants.fade_green = dc_fcolormap->Fade.g; dc_shade_constants.fade_blue = dc_fcolormap->Fade.b; dc_shade_constants.fade_alpha = dc_fcolormap->Fade.a; - dc_shade_constants.desaturate = MIN(std::abs(dc_fcolormap->Desaturate), 255) * 255 / 256; + dc_shade_constants.desaturate = MIN(abs(dc_fcolormap->Desaturate), 255) * 255 / 256; dc_shade_constants.simple_shade = (dc_fcolormap->Color.d == 0x00ffffff && dc_fcolormap->Fade.d == 0x00000000 && dc_fcolormap->Desaturate == 0); if (r_swtruecolor) { @@ -5101,7 +5101,7 @@ void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade) ds_shade_constants.fade_green = ds_fcolormap->Fade.g; ds_shade_constants.fade_blue = ds_fcolormap->Fade.b; ds_shade_constants.fade_alpha = ds_fcolormap->Fade.a; - ds_shade_constants.desaturate = MIN(std::abs(ds_fcolormap->Desaturate), 255) * 255 / 256; + ds_shade_constants.desaturate = MIN(abs(ds_fcolormap->Desaturate), 255) * 255 / 256; ds_shade_constants.simple_shade = (ds_fcolormap->Color.d == 0x00ffffff && ds_fcolormap->Fade.d == 0x00000000 && ds_fcolormap->Desaturate == 0); if (r_swtruecolor) { diff --git a/src/r_main.h b/src/r_main.h index 5266fb52c..5d4ff1174 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -115,7 +115,7 @@ inline uint32_t calc_light_multiplier(dsfixed_t light) #if defined(_MSC_VER) #define FORCEINLINE __forceinline #elif defined(__GNUC__) -#define FORCEINLINE __attribute__((always_inline)) +#define FORCEINLINE __attribute__((always_inline)) inline #else #define FORCEINLINE inline #endif From c5fcfb664f210996eadc22e20c036b50b4064abb Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 7 Jun 2016 00:55:52 +0200 Subject: [PATCH 21/94] Changed rgba renderer to use a command queue system for its drawers --- src/CMakeLists.txt | 1 + src/r_draw.cpp | 2273 +-------------------------- src/r_draw.h | 95 +- src/r_draw_rgba.cpp | 3492 ++++++++++++++++++++++++++++++++++++++++++ src/r_drawt.cpp | 8 + src/r_drawt_rgba.cpp | 2633 ++++++++++++++++++------------- src/r_main.cpp | 2 + src/r_swrenderer.cpp | 1 + src/r_things.cpp | 5 +- 9 files changed, 5189 insertions(+), 3321 deletions(-) create mode 100644 src/r_draw_rgba.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c90756b5d..75cf27cad 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -882,6 +882,7 @@ set( FASTMATH_PCH_SOURCES r_3dfloors.cpp r_bsp.cpp r_draw.cpp + r_draw_rgba.cpp r_drawt.cpp r_drawt_rgba.cpp r_main.cpp diff --git a/src/r_draw.cpp b/src/r_draw.cpp index e809342e9..984a74f3f 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -43,9 +43,6 @@ #include "gi.h" #include "stats.h" #include "x86.h" -#ifndef NO_SSE -#include -#endif #undef RANGECHECK @@ -135,6 +132,7 @@ void (*rt_tlateaddclamp4cols)(int sx, int yl, int yh); void (*rt_tlatesubclamp4cols)(int sx, int yl, int yh); void (*rt_tlaterevsubclamp4cols)(int sx, int yl, int yh); void (*rt_initcols)(BYTE *buffer); +void (*rt_span_coverage)(int x, int start, int stop); // // R_DrawColumn @@ -287,51 +285,6 @@ void R_DrawColumnP_C (void) } #endif -void R_DrawColumnP_RGBA_C() -{ - int count; - uint32_t* dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - - // Zero length, column does not exceed a pixel. - if (count <= 0) - return; - - // Framebuffer destination address. - dest = (uint32_t*)dc_dest; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - // Determine scaling, - // which is the only mapping to be done. - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - // [RH] Get local copies of these variables so that the compiler - // has a better chance of optimizing this well. - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - - // Inner loop that does the actual texture mapping, - // e.g. a DDA-lile scaling. - // This is as fast as it gets. - do - { - *dest = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); - - dest += pitch; - frac += fracstep; - - } while (--count); - } -} - // [RH] Just fills a column with a color void R_FillColumnP_C (void) { @@ -357,32 +310,6 @@ void R_FillColumnP_C (void) } } -void R_FillColumnP_RGBA() -{ - int count; - uint32_t* dest; - - count = dc_count; - - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - uint32_t light = calc_light_multiplier(dc_light); - - { - int pitch = dc_pitch; - BYTE color = dc_color; - - do - { - *dest = shade_pal_index_simple(color, light); - dest += pitch; - } while (--count); - } -} - void R_FillAddColumn_C (void) { int count; @@ -410,38 +337,6 @@ void R_FillAddColumn_C (void) } while (--count); } -void R_FillAddColumn_RGBA_C() -{ - int count; - uint32_t *dest; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - int pitch = dc_pitch; - - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); - uint32_t fg_red = (fg >> 24) & 0xff; - uint32_t fg_green = (fg >> 16) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do - { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); -} - void R_FillAddClampColumn_C (void) { int count; @@ -475,38 +370,6 @@ void R_FillAddClampColumn_C (void) } while (--count); } -void R_FillAddClampColumn_RGBA() -{ - int count; - uint32_t *dest; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - int pitch = dc_pitch; - - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); - uint32_t fg_red = (fg >> 24) & 0xff; - uint32_t fg_green = (fg >> 16) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do - { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); -} - void R_FillSubClampColumn_C (void) { int count; @@ -539,38 +402,6 @@ void R_FillSubClampColumn_C (void) } while (--count); } -void R_FillSubClampColumn_RGBA() -{ - int count; - uint32_t *dest; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - int pitch = dc_pitch; - - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); - uint32_t fg_red = (fg >> 24) & 0xff; - uint32_t fg_green = (fg >> 16) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do - { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 255; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 255; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 255; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); -} - void R_FillRevSubClampColumn_C (void) { int count; @@ -603,42 +434,9 @@ void R_FillRevSubClampColumn_C (void) } while (--count); } -void R_FillRevSubClampColumn_RGBA() -{ - int count; - uint32_t *dest; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - int pitch = dc_pitch; - - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); - uint32_t fg_red = (fg >> 24) & 0xff; - uint32_t fg_green = (fg >> 16) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do - { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 255; - uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 255; - uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 255; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); -} - // // Spectre/Invisibility. // -#define FUZZTABLE 50 extern "C" { @@ -754,105 +552,6 @@ void R_DrawFuzzColumnP_C (void) } #endif -void R_DrawFuzzColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - - // Adjust borders. Low... - if (dc_yl == 0) - dc_yl = 1; - - // .. and high. - if (dc_yh > fuzzviewheight) - dc_yh = fuzzviewheight; - - count = dc_yh - dc_yl; - - // Zero length. - if (count < 0) - return; - - count++; - - dest = ylookup[dc_yl] + dc_x + (uint32_t*)dc_destorg; - - // Note: this implementation assumes this function is only used for the pinky shadow effect (i.e. no other fancy colormap than black) - // I'm not sure if this is really always the case or not. - - { - // [RH] Make local copies of global vars to try and improve - // the optimizations made by the compiler. - int pitch = dc_pitch; - int fuzz = fuzzpos; - int cnt; - - // [RH] Split this into three separate loops to minimize - // the number of times fuzzpos needs to be clamped. - if (fuzz) - { - cnt = MIN(FUZZTABLE - fuzz, count); - count -= cnt; - do - { - uint32_t bg = dest[fuzzoffset[fuzz++]]; - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = (bg) & 0xff; - - uint32_t red = bg_red * 3 / 4; - uint32_t green = bg_green * 3 / 4; - uint32_t blue = bg_blue * 3 / 4; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--cnt); - } - if (fuzz == FUZZTABLE || count > 0) - { - while (count >= FUZZTABLE) - { - fuzz = 0; - cnt = FUZZTABLE; - count -= FUZZTABLE; - do - { - uint32_t bg = dest[fuzzoffset[fuzz++]]; - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = (bg) & 0xff; - - uint32_t red = bg_red * 3 / 4; - uint32_t green = bg_green * 3 / 4; - uint32_t blue = bg_blue * 3 / 4; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--cnt); - } - fuzz = 0; - if (count > 0) - { - do - { - uint32_t bg = dest[fuzzoffset[fuzz++]]; - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = (bg) & 0xff; - - uint32_t red = bg_red * 3 / 4; - uint32_t green = bg_green * 3 / 4; - uint32_t blue = bg_blue * 3 / 4; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); - } - } - fuzzpos = fuzz; - } -} - // // R_DrawTranlucentColumn // @@ -937,56 +636,6 @@ void R_DrawAddColumnP_C (void) } } -void R_DrawAddColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - const BYTE *source = dc_source; - int pitch = dc_pitch; - BYTE *colormap = dc_colormap; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); - - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // // R_DrawTranslatedColumn // Used to draw player sprites with the green colorramp mapped to others. @@ -1027,40 +676,6 @@ void R_DrawTranslatedColumnP_C (void) } } -void R_DrawTranslatedColumnP_RGBA_C() -{ - int count; - uint32_t* dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - // [RH] Local copies of global vars to improve compiler optimizations - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch; - - do - { - *dest = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // Draw a column that is both translated and translucent void R_DrawTlatedAddColumnP_C() { @@ -1101,56 +716,6 @@ void R_DrawTlatedAddColumnP_C() } } -void R_DrawTlatedAddColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // Draw a column whose "color" values are actually translucency // levels for a base color stored in dc_color. void R_DrawShadedColumnP_C (void) @@ -1188,52 +753,6 @@ void R_DrawShadedColumnP_C (void) } } -void R_DrawShadedColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac, fracstep; - - count = dc_count; - - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - { - const BYTE *source = dc_source; - BYTE *colormap = dc_colormap; - int pitch = dc_pitch; - - do - { - DWORD alpha = clamp(colormap[source[frac >> FRACBITS]], 0, 64); - DWORD inv_alpha = 64 - alpha; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; - uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; - uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // Add source to destination, clamping it to white void R_DrawAddClampColumnP_C () { @@ -1275,53 +794,6 @@ void R_DrawAddClampColumnP_C () } } -void R_DrawAddClampColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - const BYTE *source = dc_source; - int pitch = dc_pitch; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // Add translated source to destination, clamping it to white void R_DrawAddClampTranslatedColumnP_C () { @@ -1364,54 +836,6 @@ void R_DrawAddClampTranslatedColumnP_C () } } -void R_DrawAddClampTranslatedColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // Subtract destination from source, clamping it to black void R_DrawSubClampColumnP_C () { @@ -1452,53 +876,6 @@ void R_DrawSubClampColumnP_C () } } -void R_DrawSubClampColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - const BYTE *source = dc_source; - int pitch = dc_pitch; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // Subtract destination from source, clamping it to black void R_DrawSubClampTranslatedColumnP_C () { @@ -1540,54 +917,6 @@ void R_DrawSubClampTranslatedColumnP_C () } } -void R_DrawSubClampTranslatedColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // Subtract source from destination, clamping it to black void R_DrawRevSubClampColumnP_C () { @@ -1628,52 +957,6 @@ void R_DrawRevSubClampColumnP_C () } } -void R_DrawRevSubClampColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - const BYTE *source = dc_source; - int pitch = dc_pitch; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // Subtract source from destination, clamping it to black void R_DrawRevSubClampTranslatedColumnP_C () { @@ -1715,55 +998,6 @@ void R_DrawRevSubClampTranslatedColumnP_C () } } -void R_DrawRevSubClampTranslatedColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - - // // R_DrawSpan // With DOOM style restrictions on view orientation, @@ -1957,233 +1191,6 @@ void R_DrawSpanP_C (void) } #endif -void R_DrawSpanP_RGBA_C() -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const BYTE* source = ds_source; - int count; - int spot; - -#ifdef RANGECHECK - if (ds_x2 < ds_x1 || ds_x1 < 0 - || ds_x2 >= screen->width || ds_y > screen->height) - { - I_Error("R_DrawSpan: %i to %i at %i", ds_x1, ds_x2, ds_y); - } - // dscount++; -#endif - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - - do - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - // Lookup pixel from flat texture tile - *dest++ = shade_pal_index(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - // Lookup pixel from flat texture tile - *dest++ = shade_pal_index(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - -#ifndef NO_SSE -void R_DrawSpanP_RGBA_SSE() -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const BYTE* source = ds_source; - int count; - int spot; - -#ifdef RANGECHECK - if (ds_x2 < ds_x1 || ds_x1 < 0 - || ds_x2 >= screen->width || ds_y > screen->height) - { - I_Error("R_DrawSpan: %i to %i at %i", ds_x1, ds_x2, ds_y); - } - // dscount++; -#endif - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - - int sse_count = count / 4; - count -= sse_count * 4; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT(light); - - while (sse_count--) - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - - // Next step in u,v. - dest += 4; - } - } - else - { - SSE_SHADE_INIT(light, shade_constants); - - while (sse_count--) - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - - // Next step in u,v. - dest += 4; - } - } - - if (count == 0) - return; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - // Lookup pixel from flat texture tile - *dest++ = shade_pal_index(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - // Lookup pixel from flat texture tile - *dest++ = shade_pal_index(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} -#endif - #ifndef X86_ASM // [RH] Draw a span with holes @@ -2250,72 +1257,6 @@ void R_DrawSpanMaskedP_C (void) } #endif -void R_DrawSpanMaskedP_RGBA_C() -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; - int count; - int spot; - - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - BYTE texdata; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - texdata = source[spot]; - if (texdata != 0) - { - *dest = shade_pal_index(texdata, light, shade_constants); - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - BYTE texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - if (texdata != 0) - { - *dest = shade_pal_index(texdata, light, shade_constants); - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - - void R_DrawSpanTranslucentP_C (void) { dsfixed_t xfrac; @@ -2378,89 +1319,6 @@ void R_DrawSpanTranslucentP_C (void) } } -void R_DrawSpanTranslucentP_RGBA_C() -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const BYTE* source = ds_source; - int count; - int spot; - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - uint32_t fg = shade_pal_index(source[spot], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; - - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - uint32_t fg = shade_pal_index(source[spot], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; - - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - void R_DrawSpanMaskedTranslucentP_C (void) { dsfixed_t xfrac; @@ -2537,99 +1395,6 @@ void R_DrawSpanMaskedTranslucentP_C (void) } } -void R_DrawSpanMaskedTranslucentP_RGBA_C() -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const BYTE* source = ds_source; - int count; - int spot; - - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - BYTE texdata; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - texdata = source[spot]; - if (texdata != 0) - { - uint32_t fg = shade_pal_index(texdata, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - BYTE texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - if (texdata != 0) - { - uint32_t fg = shade_pal_index(texdata, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - void R_DrawSpanAddClampP_C (void) { dsfixed_t xfrac; @@ -2700,88 +1465,6 @@ void R_DrawSpanAddClampP_C (void) } } -void R_DrawSpanAddClampP_RGBA_C() -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const BYTE* source = ds_source; - int count; - int spot; - - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - uint32_t fg = shade_pal_index(source[spot], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; - - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - uint32_t fg = shade_pal_index(source[spot], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; - - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} void R_DrawSpanMaskedAddClampP_C (void) { @@ -2865,114 +1548,12 @@ void R_DrawSpanMaskedAddClampP_C (void) } } -void R_DrawSpanMaskedAddClampP_RGBA_C() -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const BYTE* source = ds_source; - int count; - int spot; - - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - BYTE texdata; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - texdata = source[spot]; - if (texdata != 0) - { - uint32_t fg = shade_pal_index(texdata, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - BYTE texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - if (texdata != 0) - { - uint32_t fg = shade_pal_index(texdata, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - // [RH] Just fill a span with a color void R_FillSpan_C (void) { memset (ylookup[ds_y] + ds_x1 + dc_destorg, ds_color, (ds_x2 - ds_x1 + 1)); } -void R_FillSpan_RGBA() -{ - uint32_t *dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - int count = (ds_x2 - ds_x1 + 1); - uint32_t light = calc_light_multiplier(ds_light); - uint32_t color = shade_pal_index_simple(ds_color, light); - for (int i = 0; i < count; i++) - dest[i] = color; -} // Draw a voxel slab // @@ -3070,8 +1651,8 @@ extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *v // wallscan stuff, in C -static int vlinebits; -static int mvlinebits; +int vlinebits; +int mvlinebits; #ifndef X86_ASM static DWORD vlinec1 (); @@ -3186,29 +1767,6 @@ DWORD vlinec1 () } #endif -DWORD vlinec1_RGBA() -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - int count = dc_count; - const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; - int bits = vlinebits; - int pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - do - { - *dest = shade_pal_index(source[frac >> bits], light, shade_constants); - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - #if !defined(X86_ASM) void vlinec4 () { @@ -3228,113 +1786,6 @@ void vlinec4 () } #endif -void vlinec4_RGBA() -{ - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; - int bits = vlinebits; - DWORD place; - - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = dc_shade_constants; - - do - { - dest[0] = shade_pal_index(bufplce[0][(place = vplce[0]) >> bits], light0, shade_constants); vplce[0] = place + vince[0]; - dest[1] = shade_pal_index(bufplce[1][(place = vplce[1]) >> bits], light1, shade_constants); vplce[1] = place + vince[1]; - dest[2] = shade_pal_index(bufplce[2][(place = vplce[2]) >> bits], light2, shade_constants); vplce[2] = place + vince[2]; - dest[3] = shade_pal_index(bufplce[3][(place = vplce[3]) >> bits], light3, shade_constants); vplce[3] = place + vince[3]; - dest += dc_pitch; - } while (--count); -} - -#ifndef NO_SSE -void vlinec4_RGBA_SSE() -{ - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; - int bits = vlinebits; - - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - BYTE p0 = bufplce[0][place0 >> bits]; - BYTE p1 = bufplce[1][place1 >> bits]; - BYTE p2 = bufplce[2][place2 >> bits]; - BYTE p3 = bufplce[3][place3 >> bits]; - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += dc_pitch; - } while (--count); - } - else - { - SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - BYTE p0 = bufplce[0][place0 >> bits]; - BYTE p1 = bufplce[1][place1 >> bits]; - BYTE p2 = bufplce[2][place2 >> bits]; - BYTE p3 = bufplce[3][place3 >> bits]; - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - dest += dc_pitch; - } while (--count); - } - - // Is this needed? Global variables makes it tricky to know.. - vplce[0] = local_vplce[0]; - vplce[1] = local_vplce[1]; - vplce[2] = local_vplce[2]; - vplce[3] = local_vplce[3]; - vince[0] = local_vince[0]; - vince[1] = local_vince[1]; - vince[2] = local_vince[2]; - vince[3] = local_vince[3]; -} -#endif - void setupmvline (int fracbits) { if (!r_swtruecolor) @@ -3380,34 +1831,6 @@ DWORD mvlinec1 () } #endif -DWORD mvlinec1_RGBA() -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; - int count = dc_count; - const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; - int bits = mvlinebits; - int pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - do - { - BYTE pix = source[frac >> bits]; - if (pix != 0) - { - *dest = shade_pal_index(pix, light, shade_constants); - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - #if !defined(X86_ASM) void mvlinec4 () { @@ -3428,121 +1851,6 @@ void mvlinec4 () } #endif -void mvlinec4_RGBA() -{ - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; - int bits = mvlinebits; - DWORD place; - - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = dc_shade_constants; - - do - { - BYTE pix; - pix = bufplce[0][(place = vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(pix, light0, shade_constants); vplce[0] = place + vince[0]; - pix = bufplce[1][(place = vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(pix, light1, shade_constants); vplce[1] = place + vince[1]; - pix = bufplce[2][(place = vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(pix, light2, shade_constants); vplce[2] = place + vince[2]; - pix = bufplce[3][(place = vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(pix, light3, shade_constants); vplce[3] = place + vince[3]; - dest += dc_pitch; - } while (--count); -} - -#ifndef NO_SSE -void mvlinec4_RGBA_SSE() -{ - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; - int bits = vlinebits; - - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - BYTE pix0 = bufplce[0][place0 >> bits]; - BYTE pix1 = bufplce[1][place1 >> bits]; - BYTE pix2 = bufplce[2][place2 >> bits]; - BYTE pix3 = bufplce[3][place3 >> bits]; - - // movemask = !(pix == 0) - __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); - SSE_SHADE_SIMPLE(fg); - _mm_maskmoveu_si128(fg, movemask, (char*)dest); - dest += dc_pitch; - } while (--count); - } - else - { - SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - BYTE pix0 = bufplce[0][place0 >> bits]; - BYTE pix1 = bufplce[1][place1 >> bits]; - BYTE pix2 = bufplce[2][place2 >> bits]; - BYTE pix3 = bufplce[3][place3 >> bits]; - - // movemask = !(pix == 0) - __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); - SSE_SHADE(fg, shade_constants); - _mm_maskmoveu_si128(fg, movemask, (char*)dest); - dest += dc_pitch; - } while (--count); - } - - // Is this needed? Global variables makes it tricky to know.. - vplce[0] = local_vplce[0]; - vplce[1] = local_vplce[1]; - vplce[2] = local_vplce[2]; - vplce[3] = local_vplce[3]; - vince[0] = local_vince[0]; - vince[1] = local_vince[1]; - vince[2] = local_vince[2]; - vince[3] = local_vince[3]; -} -#endif - - extern "C" short spanend[MAXHEIGHT]; extern float rw_light; extern float rw_lightstep; @@ -3666,196 +1974,6 @@ void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip) } } -static void R_DrawFogBoundarySection_RGBA(int y, int y2, int x1) -{ - BYTE *colormap = dc_colormap; - uint32_t *dest = ylookup[y] + (uint32_t*)dc_destorg; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants constants = dc_shade_constants; - - for (; y < y2; ++y) - { - int x2 = spanend[y]; - int x = x1; - do - { - uint32_t red = (dest[x] >> 16) & 0xff; - uint32_t green = (dest[x] >> 8) & 0xff; - uint32_t blue = dest[x] & 0xff; - - if (constants.simple_shade) - { - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; - } - else - { - uint32_t inv_light = 256 - light; - uint32_t inv_desaturate = 256 - constants.desaturate; - - uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; - - red = (red * inv_desaturate + intensity) / 256; - green = (green * inv_desaturate + intensity) / 256; - blue = (blue * inv_desaturate + intensity) / 256; - - red = (constants.fade_red * inv_light + red * light) / 256; - green = (constants.fade_green * inv_light + green * light) / 256; - blue = (constants.fade_blue * inv_light + blue * light) / 256; - - red = (red * constants.light_red) / 256; - green = (green * constants.light_green) / 256; - blue = (blue * constants.light_blue) / 256; - } - - dest[x] = 0xff000000 | (red << 16) | (green << 8) | blue; - } while (++x <= x2); - dest += dc_pitch; - } -} - -static void R_DrawFogBoundaryLine_RGBA(int y, int x) -{ - int x2 = spanend[y]; - BYTE *colormap = dc_colormap; - uint32_t *dest = ylookup[y] + (uint32_t*)dc_destorg; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants constants = dc_shade_constants; - - do - { - uint32_t red = (dest[x] >> 16) & 0xff; - uint32_t green = (dest[x] >> 8) & 0xff; - uint32_t blue = dest[x] & 0xff; - - if (constants.simple_shade) - { - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; - } - else - { - uint32_t inv_light = 256 - light; - uint32_t inv_desaturate = 256 - constants.desaturate; - - uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; - - red = (red * inv_desaturate + intensity) / 256; - green = (green * inv_desaturate + intensity) / 256; - blue = (blue * inv_desaturate + intensity) / 256; - - red = (constants.fade_red * inv_light + red * light) / 256; - green = (constants.fade_green * inv_light + green * light) / 256; - blue = (constants.fade_blue * inv_light + blue * light) / 256; - - red = (red * constants.light_red) / 256; - green = (green * constants.light_green) / 256; - blue = (blue * constants.light_blue) / 256; - } - - dest[x] = 0xff000000 | (red << 16) | (green << 8) | blue; - } while (++x <= x2); -} - -void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip) -{ - // To do: we do not need to create new spans when using rgba output - instead we should calculate light on a per pixel basis - - // This is essentially the same as R_MapVisPlane but with an extra step - // to create new horizontal spans whenever the light changes enough that - // we need to use a new colormap. - - double lightstep = rw_lightstep; - double light = rw_light + rw_lightstep*(x2 - x1 - 1); - int x = x2 - 1; - int t2 = uclip[x]; - int b2 = dclip[x]; - int rcolormap = GETPALOOKUP(light, wallshade); - int lcolormap; - BYTE *basecolormapdata = basecolormap->Maps; - - if (b2 > t2) - { - clearbufshort(spanend + t2, b2 - t2, x); - } - - R_SetColorMapLight(basecolormap, (float)light, wallshade); - - BYTE *fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - - for (--x; x >= x1; --x) - { - int t1 = uclip[x]; - int b1 = dclip[x]; - const int xr = x + 1; - int stop; - - light -= rw_lightstep; - lcolormap = GETPALOOKUP(light, wallshade); - if (lcolormap != rcolormap) - { - if (t2 < b2 && rcolormap != 0) - { // Colormap 0 is always the identity map, so rendering it is - // just a waste of time. - R_DrawFogBoundarySection_RGBA(t2, b2, xr); - } - if (t1 < t2) t2 = t1; - if (b1 > b2) b2 = b1; - if (t2 < b2) - { - clearbufshort(spanend + t2, b2 - t2, x); - } - rcolormap = lcolormap; - R_SetColorMapLight(basecolormap, (float)light, wallshade); - fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - } - else - { - if (fake_dc_colormap != basecolormapdata) - { - stop = MIN(t1, b2); - while (t2 < stop) - { - R_DrawFogBoundaryLine_RGBA(t2++, xr); - } - stop = MAX(b1, t2); - while (b2 > stop) - { - R_DrawFogBoundaryLine_RGBA(--b2, xr); - } - } - else - { - t2 = MAX(t2, MIN(t1, b2)); - b2 = MIN(b2, MAX(b1, t2)); - } - - stop = MIN(t2, b1); - while (t1 < stop) - { - spanend[t1++] = x; - } - stop = MAX(b2, t2); - while (b1 > stop) - { - spanend[--b1] = x; - } - } - - t2 = uclip[x]; - b2 = dclip[x]; - } - if (t2 < b2 && rcolormap != 0) - { - R_DrawFogBoundarySection_RGBA(t2, b2, x1); - } -} - - int tmvlinebits; void setuptmvline (int bits) @@ -3896,49 +2014,6 @@ fixed_t tmvline1_add_C () return frac; } -fixed_t tmvline1_add_RGBA() -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - int count = dc_count; - const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; - int bits = tmvlinebits; - int pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - BYTE pix = source[frac >> bits]; - if (pix != 0) - { - uint32_t fg = shade_pal_index(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - void tmvline4_add_C () { BYTE *dest = dc_dest; @@ -3972,51 +2047,6 @@ void tmvline4_add_C () } while (--count); } -void tmvline4_add_RGBA() -{ - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; - int bits = tmvlinebits; - - uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - for (int i = 0; i < 4; ++i) - { - BYTE pix = bufplce[i][vplce[i] >> bits]; - if (pix != 0) - { - uint32_t fg = shade_pal_index(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - vplce[i] += vince[i]; - } - dest += dc_pitch; - } while (--count); -} - fixed_t tmvline1_addclamp_C () { DWORD fracstep = dc_iscale; @@ -4055,49 +2085,6 @@ fixed_t tmvline1_addclamp_C () return frac; } -fixed_t tmvline1_addclamp_RGBA() -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - int count = dc_count; - const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; - int bits = tmvlinebits; - int pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - BYTE pix = source[frac >> bits]; - if (pix != 0) - { - uint32_t fg = shade_pal_index(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - void tmvline4_addclamp_C () { BYTE *dest = dc_dest; @@ -4130,51 +2117,6 @@ void tmvline4_addclamp_C () } while (--count); } -void tmvline4_addclamp_RGBA() -{ - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; - int bits = tmvlinebits; - - uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - for (int i = 0; i < 4; ++i) - { - BYTE pix = bufplce[i][vplce[i] >> bits]; - if (pix != 0) - { - uint32_t fg = shade_pal_index(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - vplce[i] += vince[i]; - } - dest += dc_pitch; - } while (--count); -} - fixed_t tmvline1_subclamp_C () { DWORD fracstep = dc_iscale; @@ -4210,50 +2152,6 @@ fixed_t tmvline1_subclamp_C () return frac; } -fixed_t tmvline1_subclamp_RGBA() -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; - int count = dc_count; - const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; - int bits = tmvlinebits; - int pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - BYTE pix = source[frac >> bits]; - if (pix != 0) - { - uint32_t fg = shade_pal_index(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - void tmvline4_subclamp_C () { BYTE *dest = dc_dest; @@ -4285,51 +2183,6 @@ void tmvline4_subclamp_C () } while (--count); } -void tmvline4_subclamp_RGBA() -{ - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; - int bits = tmvlinebits; - - uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - for (int i = 0; i < 4; ++i) - { - BYTE pix = bufplce[i][vplce[i] >> bits]; - if (pix != 0) - { - uint32_t fg = shade_pal_index(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - vplce[i] += vince[i]; - } - dest += dc_pitch; - } while (--count); -} - fixed_t tmvline1_revsubclamp_C () { DWORD fracstep = dc_iscale; @@ -4365,50 +2218,6 @@ fixed_t tmvline1_revsubclamp_C () return frac; } -fixed_t tmvline1_revsubclamp_RGBA() -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; - int count = dc_count; - const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; - int bits = tmvlinebits; - int pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - BYTE pix = source[frac >> bits]; - if (pix != 0) - { - uint32_t fg = shade_pal_index(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - void tmvline4_revsubclamp_C () { BYTE *dest = dc_dest; @@ -4440,52 +2249,6 @@ void tmvline4_revsubclamp_C () } while (--count); } -void tmvline4_revsubclamp_RGBA() -{ - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; - int bits = tmvlinebits; - - uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - for (int i = 0; i < 4; ++i) - { - BYTE pix = bufplce[i][vplce[i] >> bits]; - if (pix != 0) - { - uint32_t fg = shade_pal_index(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - vplce[i] += vince[i]; - } - dest += dc_pitch; - } while (--count); -} - - //========================================================================== // // R_GetColumn @@ -4535,11 +2298,7 @@ void R_InitColumnDrawers () R_DrawTranslatedColumn = R_DrawTranslatedColumnP_RGBA_C; R_DrawShadedColumn = R_DrawShadedColumnP_RGBA_C; R_DrawSpanMasked = R_DrawSpanMaskedP_RGBA_C; -#ifndef NO_SSE - R_DrawSpan = R_DrawSpanP_RGBA_SSE; -#else - R_DrawSpan = R_DrawSpanP_RGBA_C; -#endif + R_DrawSpan = R_DrawSpanP_RGBA_C; R_DrawSpanTranslucent = R_DrawSpanTranslucentP_RGBA_C; R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_RGBA_C; @@ -4579,9 +2338,13 @@ void R_InitColumnDrawers () rt_copy1col = rt_copy1col_RGBA_c; rt_copy4cols = rt_copy4cols_RGBA_c; rt_map1col = rt_map1col_RGBA_c; + rt_map4cols = rt_map4cols_RGBA_c; rt_shaded1col = rt_shaded1col_RGBA_c; + rt_shaded4cols = rt_shaded4cols_RGBA_c; rt_add1col = rt_add1col_RGBA_c; + rt_add4cols = rt_add4cols_RGBA_c; rt_addclamp1col = rt_addclamp1col_RGBA_c; + rt_addclamp4cols = rt_addclamp4cols_RGBA_c; rt_subclamp1col = rt_subclamp1col_RGBA_c; rt_revsubclamp1col = rt_revsubclamp1col_RGBA_c; rt_tlate1col = rt_tlate1col_RGBA_c; @@ -4597,31 +2360,14 @@ void R_InitColumnDrawers () rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_RGBA_c; rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_RGBA_c; rt_initcols = rt_initcols_rgba; - -#ifndef NO_SSE - rt_map4cols = rt_map4cols_RGBA_SSE; - rt_add4cols = rt_add4cols_RGBA_SSE; - rt_addclamp4cols = rt_addclamp4cols_RGBA_SSE; - rt_shaded4cols = rt_shaded4cols_RGBA_SSE; -#else - rt_map4cols = rt_map4cols_RGBA_c; - rt_add4cols = rt_add4cols_RGBA_c; - rt_addclamp4cols = rt_addclamp4cols_RGBA_c; - rt_shaded4cols = rt_shaded4cols_RGBA_c; -#endif + rt_span_coverage = rt_span_coverage_rgba; dovline1 = vlinec1_RGBA; doprevline1 = vlinec1_RGBA; domvline1 = mvlinec1_RGBA; -#ifndef NO_SSE - dovline4 = vlinec4_RGBA_SSE; - domvline4 = mvlinec4_RGBA_SSE; -#else dovline4 = vlinec4_RGBA; domvline4 = mvlinec4_RGBA; -#endif - } else { @@ -4719,6 +2465,7 @@ void R_InitColumnDrawers () rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_c; rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_c; rt_initcols = rt_initcols_pal; + rt_span_coverage = rt_span_coverage_pal; if (pointers_saved) { diff --git a/src/r_draw.h b/src/r_draw.h index cc3b10935..98be57c51 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -24,6 +24,13 @@ #define __R_DRAW__ #include "r_defs.h" +#include + +// Spectre/Invisibility. +#define FUZZTABLE 50 +extern "C" int fuzzoffset[FUZZTABLE + 1]; // [RH] +1 for the assembly routine +extern "C" int fuzzpos; +extern "C" int fuzzviewheight; struct FColormap; struct ShadeConstants; @@ -173,7 +180,6 @@ void rt_copy4cols_RGBA_c (int sx, int yl, int yh); void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh); void rt_shaded4cols_RGBA_c (int sx, int yl, int yh); -void rt_shaded4cols_RGBA_SSE (int sx, int yl, int yh); void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh); void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh); @@ -188,11 +194,8 @@ void rt_tlatesubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); void rt_tlaterevsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); void rt_map4cols_RGBA_c (int sx, int yl, int yh); -void rt_map4cols_RGBA_SSE (int sx, int yl, int yh); void rt_add4cols_RGBA_c (int sx, int yl, int yh); -void rt_add4cols_RGBA_SSE (int sx, int yl, int yh); void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh); -void rt_addclamp4cols_RGBA_SSE (int sx, int yl, int yh); void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh); void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh); @@ -235,6 +238,7 @@ extern void (*rt_tlatesubclamp4cols)(int sx, int yl, int yh); extern void (*rt_tlaterevsubclamp4cols)(int sx, int yl, int yh); extern void (*rt_initcols)(BYTE *buffer); +extern void (*rt_span_coverage)(int x, int start, int stop); void rt_draw4cols (int sx); @@ -242,6 +246,8 @@ void rt_draw4cols (int sx); void rt_initcols_pal (BYTE *buffer); void rt_initcols_rgba (BYTE *buffer); +void rt_span_coverage_pal(int x, int start, int stop); +void rt_span_coverage_rgba(int x, int start, int stop); extern void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip); @@ -277,9 +283,40 @@ void R_DrawFuzzColumnP_RGBA_C (void); void R_DrawTranslatedColumnP_RGBA_C (void); void R_DrawShadedColumnP_RGBA_C (void); void R_DrawSpanP_RGBA_C (void); -void R_DrawSpanP_RGBA_SSE (void); void R_DrawSpanMaskedP_RGBA_C (void); +void R_DrawSpanTranslucentP_RGBA_C(); +void R_DrawSpanMaskedTranslucentP_RGBA_C(); +void R_DrawSpanAddClampP_RGBA_C(); +void R_DrawSpanMaskedAddClampP_RGBA_C(); +void R_FillColumnP_RGBA(); +void R_FillAddColumn_RGBA_C(); +void R_FillAddClampColumn_RGBA(); +void R_FillSubClampColumn_RGBA(); +void R_FillRevSubClampColumn_RGBA(); +void R_DrawAddColumnP_RGBA_C(); +void R_DrawTlatedAddColumnP_RGBA_C(); +void R_DrawAddClampColumnP_RGBA_C(); +void R_DrawAddClampTranslatedColumnP_RGBA_C(); +void R_DrawSubClampColumnP_RGBA_C(); +void R_DrawSubClampTranslatedColumnP_RGBA_C(); +void R_DrawRevSubClampColumnP_RGBA_C(); +void R_DrawRevSubClampTranslatedColumnP_RGBA_C(); +void R_FillSpan_RGBA(); +void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip); +fixed_t tmvline1_add_RGBA(); +void tmvline4_add_RGBA(); +fixed_t tmvline1_addclamp_RGBA(); +void tmvline4_addclamp_RGBA(); +fixed_t tmvline1_subclamp_RGBA(); +void tmvline4_subclamp_RGBA(); +fixed_t tmvline1_revsubclamp_RGBA(); +void tmvline4_revsubclamp_RGBA(); +DWORD vlinec1_RGBA(); +void vlinec4_RGBA(); +DWORD mvlinec1_RGBA(); +void mvlinec4_RGBA(); + void R_DrawSpanTranslucentP_C (void); void R_DrawSpanMaskedTranslucentP_C (void); @@ -403,4 +440,52 @@ void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade); void R_SetTranslationMap(lighttable_t *translation); +// Wait until all drawers finished executing +void R_FinishDrawerCommands(); + +class DrawerThread +{ +public: + int core = 0; + int num_cores = 1; + + uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; + uint32_t *dc_temp_rgba; +}; + +class DrawerCommand +{ +public: + virtual void Execute(DrawerThread *thread) = 0; +}; + +class DrawerCommandQueue +{ + enum { memorypool_size = 4 * 1024 * 1024 }; + char memorypool[memorypool_size]; + size_t memorypool_pos = 0; + + std::vector commands; + + static DrawerCommandQueue *Instance(); + +public: + // Allocate memory valid for the duration of a command execution + static void* AllocMemory(size_t size); + + // Queue command to be executed by drawer worker threads + template + static void QueueCommand(Types &&... args) + { + void *ptr = AllocMemory(sizeof(T)); + T *command = new (ptr)T(std::forward(args)...); + if (!command) + return; + Instance()->commands.push_back(command); + } + + // Wait until all worker threads finished executing commands + static void Finish(); +}; + #endif diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp new file mode 100644 index 000000000..9e61bb427 --- /dev/null +++ b/src/r_draw_rgba.cpp @@ -0,0 +1,3492 @@ +// Emacs style mode select -*- C++ -*- +//----------------------------------------------------------------------------- +// +// $Id:$ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// +// $Log:$ +// +// DESCRIPTION: +// True color span/column drawing functions. +// +//----------------------------------------------------------------------------- + +#include + +#include "templates.h" +#include "doomdef.h" +#include "i_system.h" +#include "w_wad.h" +#include "r_local.h" +#include "v_video.h" +#include "doomstat.h" +#include "st_stuff.h" +#include "g_game.h" +#include "g_level.h" +#include "r_data/r_translate.h" +#include "v_palette.h" +#include "r_data/colormaps.h" +#include "r_plane.h" + +#include "gi.h" +#include "stats.h" +#include "x86.h" +#ifndef NO_SSE +#include +#endif +#include + +extern int vlinebits; +extern int mvlinebits; +extern int tmvlinebits; + +extern "C" short spanend[MAXHEIGHT]; +extern float rw_light; +extern float rw_lightstep; +extern int wallshade; + +///////////////////////////////////////////////////////////////////////////// + +DrawerCommandQueue *DrawerCommandQueue::Instance() +{ + static DrawerCommandQueue queue; + return &queue; +} + +void* DrawerCommandQueue::AllocMemory(size_t size) +{ + // Make sure allocations remain 16-byte aligned + size = (size + 15) / 16 * 16; + + auto queue = Instance(); + if (queue->memorypool_pos + size > memorypool_size) + return nullptr; + + void *data = queue->memorypool + queue->memorypool_pos; + queue->memorypool_pos += size; + return data; +} + +void DrawerCommandQueue::Finish() +{ + auto queue = Instance(); + + DrawerThread thread; + + size_t size = queue->commands.size(); + for (size_t i = 0; i < size; i++) + { + auto &command = queue->commands[i]; + command->Execute(&thread); + } + + for (auto &command : queue->commands) + command->~DrawerCommand(); + queue->commands.clear(); + queue->memorypool_pos = 0; +} + +///////////////////////////////////////////////////////////////////////////// + +class DrawColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_texturefrac; + fixed_t dc_iscale; + fixed_t dc_light; + const BYTE *dc_source; + int dc_pitch; + ShadeConstants dc_shade_constants; + +public: + DrawColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_texturefrac = ::dc_texturefrac; + dc_iscale = ::dc_iscale; + dc_light = ::dc_light; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + dc_shade_constants = ::dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t* dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + + // Zero length, column does not exceed a pixel. + if (count <= 0) + return; + + // Framebuffer destination address. + dest = (uint32_t*)dc_dest; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + // Determine scaling, + // which is the only mapping to be done. + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + // [RH] Get local copies of these variables so that the compiler + // has a better chance of optimizing this well. + const BYTE *source = dc_source; + int pitch = dc_pitch; + + // Inner loop that does the actual texture mapping, + // e.g. a DDA-lile scaling. + // This is as fast as it gets. + do + { + *dest = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); + + dest += pitch; + frac += fracstep; + + } while (--count); + } + } +}; + +class FillColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_light; + int dc_pitch; + int dc_color; + +public: + FillColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_light = ::dc_light; + dc_pitch = ::dc_pitch; + dc_color = ::dc_color; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t* dest; + + count = dc_count; + + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + uint32_t light = calc_light_multiplier(dc_light); + + { + int pitch = dc_pitch; + BYTE color = dc_color; + + do + { + *dest = shade_pal_index_simple(color, light); + dest += pitch; + } while (--count); + } + } +}; + +class FillAddColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + int dc_pitch; + fixed_t dc_light; + int dc_color; + +public: + FillAddColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_color = ::dc_color; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + int pitch = dc_pitch; + + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg_red = (fg >> 24) & 0xff; + uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); + } +}; + +class FillAddClampColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + int dc_pitch; + fixed_t dc_light; + int dc_color; + +public: + FillAddClampColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_color = ::dc_color; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + int pitch = dc_pitch; + + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg_red = (fg >> 24) & 0xff; + uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); + } +}; + +class FillSubClampColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + int dc_pitch; + int dc_color; + fixed_t dc_light; + +public: + FillSubClampColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_pitch = ::dc_pitch; + dc_color = ::dc_color; + dc_light = ::dc_light; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + int pitch = dc_pitch; + + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg_red = (fg >> 24) & 0xff; + uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 255; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 255; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 255; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); + } +}; + +class FillRevSubClampColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + int dc_pitch; + int dc_color; + fixed_t dc_light; + +public: + FillRevSubClampColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_pitch = ::dc_pitch; + dc_color = ::dc_color; + dc_light = ::dc_light; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + int pitch = dc_pitch; + + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg_red = (fg >> 24) & 0xff; + uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 255; + uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 255; + uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 255; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); + } +}; + +class DrawFuzzColumnRGBACommand : public DrawerCommand +{ + int dc_x; + int dc_yl; + int dc_yh; + BYTE *dc_destorg; + int dc_pitch; + int fuzzpos; + int fuzzviewheight; + +public: + DrawFuzzColumnRGBACommand() + { + dc_x = ::dc_x; + dc_yl = ::dc_yl; + dc_yh = ::dc_yh; + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + fuzzpos = ::fuzzpos; + fuzzviewheight = ::fuzzviewheight; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + + // Adjust borders. Low... + if (dc_yl == 0) + dc_yl = 1; + + // .. and high. + if (dc_yh > fuzzviewheight) + dc_yh = fuzzviewheight; + + count = dc_yh - dc_yl; + + // Zero length. + if (count < 0) + return; + + count++; + + dest = ylookup[dc_yl] + dc_x + (uint32_t*)dc_destorg; + + // Note: this implementation assumes this function is only used for the pinky shadow effect (i.e. no other fancy colormap than black) + // I'm not sure if this is really always the case or not. + + { + // [RH] Make local copies of global vars to try and improve + // the optimizations made by the compiler. + int pitch = dc_pitch; + int fuzz = fuzzpos; + int cnt; + + // [RH] Split this into three separate loops to minimize + // the number of times fuzzpos needs to be clamped. + if (fuzz) + { + cnt = MIN(FUZZTABLE - fuzz, count); + count -= cnt; + do + { + uint32_t bg = dest[fuzzoffset[fuzz++]]; + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = (bg) & 0xff; + + uint32_t red = bg_red * 3 / 4; + uint32_t green = bg_green * 3 / 4; + uint32_t blue = bg_blue * 3 / 4; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--cnt); + } + if (fuzz == FUZZTABLE || count > 0) + { + while (count >= FUZZTABLE) + { + fuzz = 0; + cnt = FUZZTABLE; + count -= FUZZTABLE; + do + { + uint32_t bg = dest[fuzzoffset[fuzz++]]; + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = (bg) & 0xff; + + uint32_t red = bg_red * 3 / 4; + uint32_t green = bg_green * 3 / 4; + uint32_t blue = bg_blue * 3 / 4; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--cnt); + } + fuzz = 0; + if (count > 0) + { + do + { + uint32_t bg = dest[fuzzoffset[fuzz++]]; + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = (bg) & 0xff; + + uint32_t red = bg_red * 3 / 4; + uint32_t green = bg_green * 3 / 4; + uint32_t blue = bg_blue * 3 / 4; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); + } + } + fuzzpos = fuzz; + } + } +}; + +class DrawAddColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + const BYTE *dc_source; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + DrawAddColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + const BYTE *source = dc_source; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); + + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawTranslatedColumnRGBACommand : public DrawerCommand +{ + int dc_count; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + BYTE *dc_translation; + const BYTE *dc_source; + int dc_pitch; + +public: + DrawTranslatedColumnRGBACommand() + { + dc_count = ::dc_count; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_translation = ::dc_translation; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t* dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + // [RH] Local copies of global vars to improve compiler optimizations + BYTE *translation = dc_translation; + const BYTE *source = dc_source; + int pitch = dc_pitch; + + do + { + *dest = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawTlatedAddColumnRGBACommand : public DrawerCommand +{ + int dc_count; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + BYTE *dc_translation; + const BYTE *dc_source; + int dc_pitch; + +public: + DrawTlatedAddColumnRGBACommand() + { + dc_count = ::dc_count; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_translation = ::dc_translation; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + BYTE *translation = dc_translation; + const BYTE *source = dc_source; + int pitch = dc_pitch; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); + + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawShadedColumnRGBACommand : public DrawerCommand +{ +private: + int dc_count; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + fixed_t dc_light; + const BYTE *dc_source; + lighttable_t *dc_colormap; + int dc_color; + int dc_pitch; + +public: + DrawShadedColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_light = ::dc_light; + dc_source = ::dc_source; + dc_colormap = ::dc_colormap; + dc_color = ::dc_color; + dc_pitch = ::dc_pitch; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac, fracstep; + + count = dc_count; + + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + { + const BYTE *source = dc_source; + BYTE *colormap = dc_colormap; + int pitch = dc_pitch; + + do + { + DWORD alpha = clamp(colormap[source[frac >> FRACBITS]], 0, 64); + DWORD inv_alpha = 64 - alpha; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; + uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; + uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawAddClampColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + const BYTE *dc_source; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + DrawAddClampColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawAddClampTranslatedColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + BYTE *dc_translation; + const BYTE *dc_source; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + DrawAddClampTranslatedColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_translation = ::dc_translation; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + BYTE *translation = dc_translation; + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawSubClampColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + const BYTE *dc_source; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + DrawSubClampColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawSubClampTranslatedColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + const BYTE *dc_source; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + BYTE *dc_translation; + +public: + DrawSubClampTranslatedColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + dc_translation = ::dc_translation; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + BYTE *translation = dc_translation; + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawRevSubClampColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + const BYTE *dc_source; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + DrawRevSubClampColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawRevSubClampTranslatedColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + const BYTE *dc_source; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + BYTE *dc_translation; + +public: + DrawRevSubClampTranslatedColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + dc_translation = ::dc_translation; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + BYTE *translation = dc_translation; + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawSpanRGBACommand : public DrawerCommand +{ + const BYTE *ds_source; + fixed_t ds_xfrac; + fixed_t ds_yfrac; + fixed_t ds_xstep; + fixed_t ds_ystep; + int ds_x1; + int ds_x2; + int ds_y; + int ds_xbits; + int ds_ybits; + BYTE *dc_destorg; + fixed_t ds_light; + ShadeConstants ds_shade_constants; + +public: + DrawSpanRGBACommand() + { + ds_source = ::ds_source; + ds_xfrac = ::ds_xfrac; + ds_yfrac = ::ds_yfrac; + ds_xstep = ::ds_xstep; + ds_ystep = ::ds_ystep; + ds_x1 = ::ds_x1; + ds_x2 = ::ds_x2; + ds_y = ::ds_y; + ds_xbits = ::ds_xbits; + ds_ybits = ::ds_ybits; + dc_destorg = ::dc_destorg; + ds_light = ::ds_light; + ds_shade_constants = ::ds_shade_constants; + } + +#ifdef NO_SSE + void Execute(DrawerThread *thread) override + { + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const BYTE* source = ds_source; + int count; + int spot; + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + + do + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + + // Lookup pixel from flat texture tile + *dest++ = shade_pal_index(source[spot], light, shade_constants); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + + do + { + // Current texture index in u,v. + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + // Lookup pixel from flat texture tile + *dest++ = shade_pal_index(source[spot], light, shade_constants); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } +#else + void Execute(DrawerThread *thread) override + { + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const BYTE* source = ds_source; + int count; + int spot; + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + + int sse_count = count / 4; + count -= sse_count * 4; + + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT(light); + + while (sse_count--) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + + // Next step in u,v. + dest += 4; + } + } + else + { + SSE_SHADE_INIT(light, shade_constants); + + while (sse_count--) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + + // Next step in u,v. + dest += 4; + } + } + + if (count == 0) + return; + + do + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + + // Lookup pixel from flat texture tile + *dest++ = shade_pal_index(source[spot], light, shade_constants); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + + do + { + // Current texture index in u,v. + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + // Lookup pixel from flat texture tile + *dest++ = shade_pal_index(source[spot], light, shade_constants); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } +#endif +}; + +class DrawSpanMaskedRGBACommand : public DrawerCommand +{ + const BYTE *ds_source; + fixed_t ds_light; + ShadeConstants ds_shade_constants; + fixed_t ds_xfrac; + fixed_t ds_yfrac; + BYTE *dc_destorg; + int ds_x1; + int ds_y1; + int ds_y; + fixed_t ds_xstep; + fixed_t ds_ystep; + int ds_xbits; + int ds_ybits; + +public: + DrawSpanMaskedRGBACommand() + { + ds_source = ::ds_source; + ds_light = ::ds_light; + ds_shade_constants = ::ds_shade_constants; + ds_xfrac = ::ds_xfrac; + ds_yfrac = ::ds_yfrac; + dc_destorg = ::dc_destorg; + ds_x1 = ::ds_x1; + ds_x2 = ::ds_x2; + ds_y = ::ds_y; + ds_xstep = ::ds_xstep; + ds_ystep = ::ds_ystep; + ds_xbits = ::ds_xbits; + ds_ybits = ::ds_ybits; + } + + void Execute(DrawerThread *thread) override + { + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const BYTE* source = ds_source; + int count; + int spot; + + uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + BYTE texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { + *dest = shade_pal_index(texdata, light, shade_constants); + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + BYTE texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + *dest = shade_pal_index(texdata, light, shade_constants); + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } +}; + +class DrawSpanTranslucentRGBACommand : public DrawerCommand +{ + const BYTE *ds_source; + fixed_t ds_light; + ShadeConstants ds_shade_constants; + fixed_t ds_xfrac; + fixed_t ds_yfrac; + BYTE *dc_destorg; + int ds_x1; + int ds_y1; + int ds_y; + fixed_t ds_xstep; + fixed_t ds_ystep; + int ds_xbits; + int ds_ybits; + +public: + DrawSpanTranslucentRGBACommand() + { + ds_source = ::ds_source; + ds_light = ::ds_light; + ds_shade_constants = ::ds_shade_constants; + ds_xfrac = ::ds_xfrac; + ds_yfrac = ::ds_yfrac; + dc_destorg = ::dc_destorg; + ds_x1 = ::ds_x1; + ds_x2 = ::ds_x2; + ds_y = ::ds_y; + ds_xstep = ::ds_xstep; + ds_ystep = ::ds_ystep; + ds_xbits = ::ds_xbits; + ds_ybits = ::ds_ybits; + } + + void Execute(DrawerThread *thread) override + { + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const BYTE* source = ds_source; + int count; + int spot; + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + + uint32_t fg = shade_pal_index(source[spot], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; + + *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; + + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + uint32_t fg = shade_pal_index(source[spot], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; + + *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; + + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } +}; + +class DrawSpanMaskedTranslucentRGBACommand : public DrawerCommand +{ + const BYTE *ds_source; + fixed_t ds_light; + ShadeConstants ds_shade_constants; + fixed_t ds_xfrac; + fixed_t ds_yfrac; + BYTE *dc_destorg; + int ds_x1; + int ds_y1; + int ds_y; + fixed_t ds_xstep; + fixed_t ds_ystep; + int ds_xbits; + int ds_ybits; + +public: + DrawSpanMaskedTranslucentRGBACommand() + { + ds_source = ::ds_source; + ds_light = ::ds_light; + ds_shade_constants = ::ds_shade_constants; + ds_xfrac = ::ds_xfrac; + ds_yfrac = ::ds_yfrac; + dc_destorg = ::dc_destorg; + ds_x1 = ::ds_x1; + ds_x2 = ::ds_x2; + ds_y = ::ds_y; + ds_xstep = ::ds_xstep; + ds_ystep = ::ds_ystep; + ds_xbits = ::ds_xbits; + ds_ybits = ::ds_ybits; + } + + void Execute(DrawerThread *thread) override + { + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const BYTE* source = ds_source; + int count; + int spot; + + uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + BYTE texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = shade_pal_index(texdata, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + BYTE texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = shade_pal_index(texdata, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } +}; + +class DrawSpanAddClampRGBACommand : public DrawerCommand +{ + const BYTE *ds_source; + fixed_t ds_light; + ShadeConstants ds_shade_constants; + fixed_t ds_xfrac; + fixed_t ds_yfrac; + BYTE *dc_destorg; + int ds_x1; + int ds_y1; + int ds_y; + fixed_t ds_xstep; + fixed_t ds_ystep; + int ds_xbits; + int ds_ybits; + +public: + DrawSpanAddClampRGBACommand() + { + ds_source = ::ds_source; + ds_light = ::ds_light; + ds_shade_constants = ::ds_shade_constants; + ds_xfrac = ::ds_xfrac; + ds_yfrac = ::ds_yfrac; + dc_destorg = ::dc_destorg; + ds_x1 = ::ds_x1; + ds_x2 = ::ds_x2; + ds_y = ::ds_y; + ds_xstep = ::ds_xstep; + ds_ystep = ::ds_ystep; + ds_xbits = ::ds_xbits; + ds_ybits = ::ds_ybits; + } + + void Execute(DrawerThread *thread) override + { + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const BYTE* source = ds_source; + int count; + int spot; + + uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + + uint32_t fg = shade_pal_index(source[spot], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; + + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + uint32_t fg = shade_pal_index(source[spot], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; + + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } +}; + +class DrawSpanMaskedAddClampRGBACommand : public DrawerCommand +{ + const BYTE *ds_source; + fixed_t ds_light; + ShadeConstants ds_shade_constants; + fixed_t ds_xfrac; + fixed_t ds_yfrac; + BYTE *dc_destorg; + int ds_x1; + int ds_y1; + int ds_y; + fixed_t ds_xstep; + fixed_t ds_ystep; + int ds_xbits; + int ds_ybits; + +public: + DrawSpanMaskedAddClampRGBACommand() + { + ds_source = ::ds_source; + ds_light = ::ds_light; + ds_shade_constants = ::ds_shade_constants; + ds_xfrac = ::ds_xfrac; + ds_yfrac = ::ds_yfrac; + dc_destorg = ::dc_destorg; + ds_x1 = ::ds_x1; + ds_x2 = ::ds_x2; + ds_y = ::ds_y; + ds_xstep = ::ds_xstep; + ds_ystep = ::ds_ystep; + ds_xbits = ::ds_xbits; + ds_ybits = ::ds_ybits; + } + + void Execute(DrawerThread *thread) override + { + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const BYTE* source = ds_source; + int count; + int spot; + + uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + BYTE texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = shade_pal_index(texdata, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + BYTE texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = shade_pal_index(texdata, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } +}; + +class FillSpanRGBACommand : public DrawerCommand +{ + int ds_x1; + int ds_x2; + int ds_y; + BYTE *dc_destorg; + fixed_t ds_light; + int ds_color; + +public: + FillSpanRGBACommand() + { + ds_x1 = ::ds_x1; + ds_x2 = ::ds_x2; + ds_y = ::ds_y; + dc_destorg = ::dc_destorg; + ds_light = ::ds_light; + ds_color = ::ds_color; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + int count = (ds_x2 - ds_x1 + 1); + uint32_t light = calc_light_multiplier(ds_light); + uint32_t color = shade_pal_index_simple(ds_color, light); + for (int i = 0; i < count; i++) + dest[i] = color; + } +}; + +class Vlinec1RGBACommand : public DrawerCommand +{ + fixed_t dc_iscale; + fixed_t dc_texturefrac; + int dc_count; + const BYTE *dc_source; + BYTE *dc_dest; + int vlinebits; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + +public: + Vlinec1RGBACommand() + { + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_count = ::dc_count; + dc_source = ::dc_source; + dc_dest = ::dc_dest; + vlinebits = ::vlinebits; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + int count = dc_count; + const BYTE *source = dc_source; + uint32_t *dest = (uint32_t*)dc_dest; + int bits = vlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + do + { + *dest = shade_pal_index(source[frac >> bits], light, shade_constants); + frac += fracstep; + dest += pitch; + } while (--count); + } +}; + +class Vlinec4RGBACommand : public DrawerCommand +{ + BYTE *dc_dest; + int dc_count; + int dc_pitch; + ShadeConstants dc_shade_constants; + int vlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const BYTE *bufplce[4]; + +public: + Vlinec4RGBACommand() + { + dc_dest = ::dc_dest; + dc_count = ::dc_count; + dc_pitch = ::dc_pitch; + dc_shade_constants = ::dc_shade_constants; + vlinebits = ::vlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = ::bufplce[i]; + } + } + +#ifdef NO_SSE + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = vlinebits; + DWORD place; + + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = dc_shade_constants; + + do + { + dest[0] = shade_pal_index(bufplce[0][(place = vplce[0]) >> bits], light0, shade_constants); vplce[0] = place + vince[0]; + dest[1] = shade_pal_index(bufplce[1][(place = vplce[1]) >> bits], light1, shade_constants); vplce[1] = place + vince[1]; + dest[2] = shade_pal_index(bufplce[2][(place = vplce[2]) >> bits], light2, shade_constants); vplce[2] = place + vince[2]; + dest[3] = shade_pal_index(bufplce[3][(place = vplce[3]) >> bits], light3, shade_constants); vplce[3] = place + vince[3]; + dest += dc_pitch; + } while (--count); + } +#else + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = vlinebits; + + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + BYTE p0 = bufplce[0][place0 >> bits]; + BYTE p1 = bufplce[1][place1 >> bits]; + BYTE p2 = bufplce[2][place2 >> bits]; + BYTE p3 = bufplce[3][place3 >> bits]; + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += dc_pitch; + } while (--count); + } + else + { + SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + BYTE p0 = bufplce[0][place0 >> bits]; + BYTE p1 = bufplce[1][place1 >> bits]; + BYTE p2 = bufplce[2][place2 >> bits]; + BYTE p3 = bufplce[3][place3 >> bits]; + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + dest += dc_pitch; + } while (--count); + } + } +#endif +}; + +class Mvlinec1RGBACommand : public DrawerCommand +{ + fixed_t dc_iscale; + fixed_t dc_texturefrac; + int dc_count; + const BYTE *dc_source; + BYTE *dc_dest; + int mvlinebits; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + +public: + Mvlinec1RGBACommand() + { + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_count = ::dc_count; + dc_source = ::dc_source; + dc_dest = ::dc_dest; + mvlinebits = ::mvlinebits; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + int count = dc_count; + const BYTE *source = dc_source; + uint32_t *dest = (uint32_t*)dc_dest; + int bits = mvlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + do + { + BYTE pix = source[frac >> bits]; + if (pix != 0) + { + *dest = shade_pal_index(pix, light, shade_constants); + } + frac += fracstep; + dest += pitch; + } while (--count); + } +}; + +class Mvlinec4RGBACommand : public DrawerCommand +{ + BYTE *dc_dest; + int dc_count; + int dc_pitch; + ShadeConstants dc_shade_constants; + int mvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const BYTE *bufplce[4]; + +public: + Mvlinec4RGBACommand() + { + dc_dest = ::dc_dest; + dc_count = ::dc_count; + dc_pitch = ::dc_pitch; + dc_shade_constants = ::dc_shade_constants; + mvlinebits = ::mvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = ::bufplce[i]; + } + } + +#ifdef NO_SSE + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = mvlinebits; + DWORD place; + + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = dc_shade_constants; + + do + { + BYTE pix; + pix = bufplce[0][(place = vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(pix, light0, shade_constants); vplce[0] = place + vince[0]; + pix = bufplce[1][(place = vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(pix, light1, shade_constants); vplce[1] = place + vince[1]; + pix = bufplce[2][(place = vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(pix, light2, shade_constants); vplce[2] = place + vince[2]; + pix = bufplce[3][(place = vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(pix, light3, shade_constants); vplce[3] = place + vince[3]; + dest += dc_pitch; + } while (--count); + } +#else + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = mvlinebits; + + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + BYTE pix0 = bufplce[0][place0 >> bits]; + BYTE pix1 = bufplce[1][place1 >> bits]; + BYTE pix2 = bufplce[2][place2 >> bits]; + BYTE pix3 = bufplce[3][place3 >> bits]; + + // movemask = !(pix == 0) + __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); + SSE_SHADE_SIMPLE(fg); + _mm_maskmoveu_si128(fg, movemask, (char*)dest); + dest += dc_pitch; + } while (--count); + } + else + { + SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + BYTE pix0 = bufplce[0][place0 >> bits]; + BYTE pix1 = bufplce[1][place1 >> bits]; + BYTE pix2 = bufplce[2][place2 >> bits]; + BYTE pix3 = bufplce[3][place3 >> bits]; + + // movemask = !(pix == 0) + __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); + SSE_SHADE(fg, shade_constants); + _mm_maskmoveu_si128(fg, movemask, (char*)dest); + dest += dc_pitch; + } while (--count); + } + } +#endif +}; + +class Tmvline1AddRGBACommand : public DrawerCommand +{ + fixed_t dc_iscale; + fixed_t dc_texturefrac; + int dc_count; + const BYTE *dc_source; + BYTE *dc_dest; + int tmvlinebits; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + Tmvline1AddRGBACommand() + { + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_count = ::dc_count; + dc_source = ::dc_source; + dc_dest = ::dc_dest; + tmvlinebits = ::tmvlinebits; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + int count = dc_count; + const BYTE *source = dc_source; + uint32_t *dest = (uint32_t*)dc_dest; + int bits = tmvlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + BYTE pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(pix, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + frac += fracstep; + dest += pitch; + } while (--count); + } +}; + +class Tmvline4AddRGBACommand : public DrawerCommand +{ + BYTE *dc_dest; + int dc_count; + int dc_pitch; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + int tmvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const BYTE *bufplce[4]; + +public: + Tmvline4AddRGBACommand() + { + dc_dest = ::dc_dest; + dc_count = ::dc_count; + dc_pitch = ::dc_pitch; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + tmvlinebits = ::tmvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = ::bufplce[i]; + } + } + + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = tmvlinebits; + + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + for (int i = 0; i < 4; ++i) + { + BYTE pix = bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(pix, light[i], shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + vplce[i] += vince[i]; + } + dest += dc_pitch; + } while (--count); + } +}; + +class Tmvline1AddClampRGBACommand : public DrawerCommand +{ + fixed_t dc_iscale; + fixed_t dc_texturefrac; + int dc_count; + const BYTE *dc_source; + BYTE *dc_dest; + int tmvlinebits; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + Tmvline1AddClampRGBACommand() + { + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_count = ::dc_count; + dc_source = ::dc_source; + dc_dest = ::dc_dest; + tmvlinebits = ::tmvlinebits; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + int count = dc_count; + const BYTE *source = dc_source; + uint32_t *dest = (uint32_t*)dc_dest; + int bits = tmvlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + BYTE pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(pix, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + frac += fracstep; + dest += pitch; + } while (--count); + } +}; + +class Tmvline4AddClampRGBACommand : public DrawerCommand +{ + BYTE *dc_dest; + int dc_count; + int dc_pitch; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + int tmvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const BYTE *bufplce[4]; + +public: + Tmvline4AddClampRGBACommand() + { + dc_dest = ::dc_dest; + dc_count = ::dc_count; + dc_pitch = ::dc_pitch; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + tmvlinebits = ::tmvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = ::bufplce[i]; + } + } + + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = tmvlinebits; + + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + for (int i = 0; i < 4; ++i) + { + BYTE pix = bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(pix, light[i], shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + vplce[i] += vince[i]; + } + dest += dc_pitch; + } while (--count); + } +}; + +class Tmvline1SubClampRGBACommand : public DrawerCommand +{ + fixed_t dc_iscale; + fixed_t dc_texturefrac; + int dc_count; + const BYTE *dc_source; + BYTE *dc_dest; + int tmvlinebits; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + Tmvline1SubClampRGBACommand() + { + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_count = ::dc_count; + dc_source = ::dc_source; + dc_dest = ::dc_dest; + tmvlinebits = ::tmvlinebits; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + int count = dc_count; + const BYTE *source = dc_source; + uint32_t *dest = (uint32_t*)dc_dest; + int bits = tmvlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + BYTE pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(pix, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + frac += fracstep; + dest += pitch; + } while (--count); + } +}; + +class Tmvline4SubClampRGBACommand : public DrawerCommand +{ + BYTE *dc_dest; + int dc_count; + int dc_pitch; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + int tmvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const BYTE *bufplce[4]; + +public: + Tmvline4SubClampRGBACommand() + { + dc_dest = ::dc_dest; + dc_count = ::dc_count; + dc_pitch = ::dc_pitch; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + tmvlinebits = ::tmvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = ::bufplce[i]; + } + } + + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = tmvlinebits; + + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + for (int i = 0; i < 4; ++i) + { + BYTE pix = bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(pix, light[i], shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + vplce[i] += vince[i]; + } + dest += dc_pitch; + } while (--count); + } +}; + +class Tmvline1RevSubClampRGBACommand : public DrawerCommand +{ + fixed_t dc_iscale; + fixed_t dc_texturefrac; + int dc_count; + const BYTE *dc_source; + BYTE *dc_dest; + int tmvlinebits; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + Tmvline1RevSubClampRGBACommand() + { + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_count = ::dc_count; + dc_source = ::dc_source; + dc_dest = ::dc_dest; + tmvlinebits = ::tmvlinebits; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + int count = dc_count; + const BYTE *source = dc_source; + uint32_t *dest = (uint32_t*)dc_dest; + int bits = tmvlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + BYTE pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(pix, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + frac += fracstep; + dest += pitch; + } while (--count); + } +}; + +class Tmvline4RevSubClampRGBACommand : public DrawerCommand +{ + BYTE *dc_dest; + int dc_count; + int dc_pitch; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + int tmvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const BYTE *bufplce[4]; + +public: + Tmvline4RevSubClampRGBACommand() + { + dc_dest = ::dc_dest; + dc_count = ::dc_count; + dc_pitch = ::dc_pitch; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + tmvlinebits = ::tmvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = ::bufplce[i]; + } + } + + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = tmvlinebits; + + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + for (int i = 0; i < 4; ++i) + { + BYTE pix = bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(pix, light[i], shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + vplce[i] += vince[i]; + } + dest += dc_pitch; + } while (--count); + } +}; + +class DrawFogBoundaryLineRGBACommand : public DrawerCommand +{ + int _y; + int _x; + int _x2; + BYTE *dc_destorg; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + +public: + DrawFogBoundaryLineRGBACommand(int y, int x, int x2) + { + _y = y; + _x = x; + _x2 = x2; + + dc_destorg = ::dc_destorg; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + int y = _y; + int x = _x; + int x2 = _x2; + + uint32_t *dest = ylookup[y] + (uint32_t*)dc_destorg; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants constants = dc_shade_constants; + + do + { + uint32_t red = (dest[x] >> 16) & 0xff; + uint32_t green = (dest[x] >> 8) & 0xff; + uint32_t blue = dest[x] & 0xff; + + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; + + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; + + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; + + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + + dest[x] = 0xff000000 | (red << 16) | (green << 8) | blue; + } while (++x <= x2); + } +}; + +///////////////////////////////////////////////////////////////////////////// + +void R_FinishDrawerCommands() +{ + DrawerCommandQueue::Finish(); +} + +void R_DrawColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_FillColumnP_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_FillAddColumn_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_FillAddClampColumn_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_FillSubClampColumn_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_FillRevSubClampColumn_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawFuzzColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); + fuzzpos = (fuzzpos + dc_yh - dc_yl) % FUZZTABLE; +} + +void R_DrawAddColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawTranslatedColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawTlatedAddColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawShadedColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawAddClampColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawAddClampTranslatedColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawSubClampColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawSubClampTranslatedColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawRevSubClampColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawRevSubClampTranslatedColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawSpanP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawSpanMaskedP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawSpanTranslucentP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawSpanMaskedTranslucentP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawSpanAddClampP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawSpanMaskedAddClampP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_FillSpan_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +DWORD vlinec1_RGBA() +{ + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; +} + +void vlinec4_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +DWORD mvlinec1_RGBA() +{ + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; +} + +void mvlinec4_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +fixed_t tmvline1_add_RGBA() +{ + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; +} + +void tmvline4_add_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +fixed_t tmvline1_addclamp_RGBA() +{ + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; +} + +void tmvline4_addclamp_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +fixed_t tmvline1_subclamp_RGBA() +{ + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; +} + +void tmvline4_subclamp_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +fixed_t tmvline1_revsubclamp_RGBA() +{ + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; +} + +void tmvline4_revsubclamp_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawFogBoundarySection_RGBA(int y, int y2, int x1) +{ + for (; y < y2; ++y) + { + int x2 = spanend[y]; + DrawerCommandQueue::QueueCommand(y, x1, x2); + } +} + +void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip) +{ + // To do: we do not need to create new spans when using rgba output - instead we should calculate light on a per pixel basis + + // This is essentially the same as R_MapVisPlane but with an extra step + // to create new horizontal spans whenever the light changes enough that + // we need to use a new colormap. + + double lightstep = rw_lightstep; + double light = rw_light + rw_lightstep*(x2 - x1 - 1); + int x = x2 - 1; + int t2 = uclip[x]; + int b2 = dclip[x]; + int rcolormap = GETPALOOKUP(light, wallshade); + int lcolormap; + BYTE *basecolormapdata = basecolormap->Maps; + + if (b2 > t2) + { + clearbufshort(spanend + t2, b2 - t2, x); + } + + R_SetColorMapLight(basecolormap, (float)light, wallshade); + + BYTE *fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + + for (--x; x >= x1; --x) + { + int t1 = uclip[x]; + int b1 = dclip[x]; + const int xr = x + 1; + int stop; + + light -= rw_lightstep; + lcolormap = GETPALOOKUP(light, wallshade); + if (lcolormap != rcolormap) + { + if (t2 < b2 && rcolormap != 0) + { // Colormap 0 is always the identity map, so rendering it is + // just a waste of time. + R_DrawFogBoundarySection_RGBA(t2, b2, xr); + } + if (t1 < t2) t2 = t1; + if (b1 > b2) b2 = b1; + if (t2 < b2) + { + clearbufshort(spanend + t2, b2 - t2, x); + } + rcolormap = lcolormap; + R_SetColorMapLight(basecolormap, (float)light, wallshade); + fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + } + else + { + if (fake_dc_colormap != basecolormapdata) + { + stop = MIN(t1, b2); + while (t2 < stop) + { + int y = t2++; + DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); + } + stop = MAX(b1, t2); + while (b2 > stop) + { + int y = --b2; + DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); + } + } + else + { + t2 = MAX(t2, MIN(t1, b2)); + b2 = MIN(b2, MAX(b1, t2)); + } + + stop = MIN(t2, b1); + while (t1 < stop) + { + spanend[t1++] = x; + } + stop = MAX(b2, t2); + while (b1 > stop) + { + spanend[--b1] = x; + } + } + + t2 = uclip[x]; + b2 = dclip[x]; + } + if (t2 < b2 && rcolormap != 0) + { + R_DrawFogBoundarySection_RGBA(t2, b2, x1); + } +} diff --git a/src/r_drawt.cpp b/src/r_drawt.cpp index ca6862ed6..c829c2dc4 100644 --- a/src/r_drawt.cpp +++ b/src/r_drawt.cpp @@ -1019,6 +1019,14 @@ void rt_initcols_pal (BYTE *buff) horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0]; } +void rt_span_coverage_pal(int x, int start, int stop) +{ + unsigned int **tspan = &dc_ctspan[x & 3]; + (*tspan)[0] = start; + (*tspan)[1] = stop; + *tspan += 2; +} + // Stretches a column into a temporary buffer which is later // drawn to the screen along with up to three other columns. void R_DrawColumnHorizP_C (void) diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index ff5c0d82f..bbf68a795 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -46,53 +46,1560 @@ #include #endif -uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT*4]; -uint32_t *dc_temp_rgba; - -// Defined in r_draw_t.cpp: extern unsigned int dc_tspans[4][MAXHEIGHT]; extern unsigned int *dc_ctspan[4]; extern unsigned int *horizspan[4]; +///////////////////////////////////////////////////////////////////////////// + +class RtCopy1colRGBACommand : public DrawerCommand +{ + int hx; + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + +public: + RtCopy1colRGBACommand(int hx, int sx, int yl, int yh) + { + this->hx = hx; + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4 + hx]; + pitch = dc_pitch; + + if (count & 1) { + *dest = *source; + source += 4; + dest += pitch; + } + if (count & 2) { + dest[0] = source[0]; + dest[pitch] = source[4]; + source += 8; + dest += pitch * 2; + } + if (!(count >>= 2)) + return; + + do { + dest[0] = source[0]; + dest[pitch] = source[4]; + dest[pitch * 2] = source[8]; + dest[pitch * 3] = source[12]; + source += 16; + dest += pitch * 4; + } while (--count); + } +}; + +class RtMap1colRGBACommand : public DrawerCommand +{ + int hx; + int sx; + int yl; + int yh; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + BYTE *dc_destorg; + int dc_pitch; + +public: + RtMap1colRGBACommand(int hx, int sx, int yl, int yh) + { + this->hx = hx; + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4 + hx]; + pitch = dc_pitch; + + if (count & 1) { + *dest = shade_pal_index(*source, light, shade_constants); + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + dest[0] = shade_pal_index(source[0], light, shade_constants); + dest[pitch] = shade_pal_index(source[4], light, shade_constants); + source += 8; + dest += pitch * 2; + } while (--count); + } +}; + +class RtMap4colsRGBACommand : public DrawerCommand +{ + int sx; + int yl; + int yh; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + BYTE *dc_destorg; + int dc_pitch; + +public: + RtMap4colsRGBACommand(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + } + +#ifdef NO_SSE + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + if (count & 1) { + dest[0] = shade_pal_index(source[0], light, shade_constants); + dest[1] = shade_pal_index(source[1], light, shade_constants); + dest[2] = shade_pal_index(source[2], light, shade_constants); + dest[3] = shade_pal_index(source[3], light, shade_constants); + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + dest[0] = shade_pal_index(source[0], light, shade_constants); + dest[1] = shade_pal_index(source[1], light, shade_constants); + dest[2] = shade_pal_index(source[2], light, shade_constants); + dest[3] = shade_pal_index(source[3], light, shade_constants); + dest[pitch] = shade_pal_index(source[4], light, shade_constants); + dest[pitch + 1] = shade_pal_index(source[5], light, shade_constants); + dest[pitch + 2] = shade_pal_index(source[6], light, shade_constants); + dest[pitch + 3] = shade_pal_index(source[7], light, shade_constants); + source += 8; + dest += pitch * 2; + } while (--count); + } +#else + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(dc_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT(light); + + if (count & 1) { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + // shade_pal_index 0-3 + { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + } + + // shade_pal_index 4-7 (pitch) + { + uint32_t p0 = source[4]; + uint32_t p1 = source[5]; + uint32_t p2 = source[6]; + uint32_t p3 = source[7]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)(dest + pitch), fg); + } + + source += 8; + dest += pitch * 2; + } while (--count); + } + else + { + SSE_SHADE_INIT(light, shade_constants); + + if (count & 1) { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + // shade_pal_index 0-3 + { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + } + + // shade_pal_index 4-7 (pitch) + { + uint32_t p0 = source[4]; + uint32_t p1 = source[5]; + uint32_t p2 = source[6]; + uint32_t p3 = source[7]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)(dest + pitch), fg); + } + + source += 8; + dest += pitch * 2; + } while (--count); + } + } +#endif +}; + +class RtTranslate1colRGBACommand : public DrawerCommand +{ + const BYTE *translation; + int hx; + int yl; + int yh; + +public: + RtTranslate1colRGBACommand(const BYTE *translation, int hx, int yl, int yh) + { + this->translation = translation; + this->hx = hx; + this->yl = yl; + this->yh = yh; + } + + void Execute(DrawerThread *thread) override + { + int count = yh - yl + 1; + uint32_t *source = &thread->dc_temp_rgba[yl*4 + hx]; + + // Things we do to hit the compiler's optimizer with a clue bat: + // 1. Parallelism is explicitly spelled out by using a separate + // C instruction for each assembly instruction. GCC lets me + // have four temporaries, but VC++ spills to the stack with + // more than two. Two is probably optimal, anyway. + // 2. The results of the translation lookups are explicitly + // stored in byte-sized variables. This causes the VC++ code + // to use byte mov instructions in most cases; for apparently + // random reasons, it will use movzx for some places. GCC + // ignores this and uses movzx always. + + // Do 8 rows at a time. + for (int count8 = count >> 3; count8; --count8) + { + int c0, c1; + BYTE b0, b1; + + c0 = source[0]; c1 = source[4]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[4] = b1; + + c0 = source[8]; c1 = source[12]; + b0 = translation[c0]; b1 = translation[c1]; + source[8] = b0; source[12] = b1; + + c0 = source[16]; c1 = source[20]; + b0 = translation[c0]; b1 = translation[c1]; + source[16] = b0; source[20] = b1; + + c0 = source[24]; c1 = source[28]; + b0 = translation[c0]; b1 = translation[c1]; + source[24] = b0; source[28] = b1; + + source += 32; + } + // Finish by doing 1 row at a time. + for (count &= 7; count; --count, source += 4) + { + source[0] = translation[source[0]]; + } + } +}; + +class RtTranslate4colsRGBACommand : public DrawerCommand +{ + const BYTE *translation; + int yl; + int yh; + +public: + RtTranslate4colsRGBACommand(const BYTE *translation, int yl, int yh) + { + this->translation = translation; + this->yl = yl; + this->yh = yh; + } + + void Execute(DrawerThread *thread) override + { + int count = yh - yl + 1; + uint32_t *source = &thread->dc_temp_rgba[yl*4]; + int c0, c1; + BYTE b0, b1; + + // Do 2 rows at a time. + for (int count8 = count >> 1; count8; --count8) + { + c0 = source[0]; c1 = source[1]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[1] = b1; + + c0 = source[2]; c1 = source[3]; + b0 = translation[c0]; b1 = translation[c1]; + source[2] = b0; source[3] = b1; + + c0 = source[4]; c1 = source[5]; + b0 = translation[c0]; b1 = translation[c1]; + source[4] = b0; source[5] = b1; + + c0 = source[6]; c1 = source[7]; + b0 = translation[c0]; b1 = translation[c1]; + source[6] = b0; source[7] = b1; + + source += 8; + } + // Do the final row if count was odd. + if (count & 1) + { + c0 = source[0]; c1 = source[1]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[1] = b1; + + c0 = source[2]; c1 = source[3]; + b0 = translation[c0]; b1 = translation[c1]; + source[2] = b0; source[3] = b1; + } + } +}; + +class RtAdd1colRGBACommand : public DrawerCommand +{ + int hx; + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + RtAdd1colRGBACommand(int hx, int sx, int yl, int yh) + { + this->hx = hx; + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4 + hx]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do { + uint32_t fg = shade_pal_index(*source, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + + source += 4; + dest += pitch; + } while (--count); + } +}; + +class RtAdd4colsRGBACommand : public DrawerCommand +{ + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + +public: + RtAdd4colsRGBACommand(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + } + +#ifdef NO_SSE + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(source[i], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + + source += 4; + dest += pitch; + } while (--count); + } +#else + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + ShadeConstants shade_constants = dc_shade_constants; + + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT(light); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } + else + { + SSE_SHADE_INIT(light, shade_constants); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } + } +#endif +}; + +class RtShaded1colRGBACommand : public DrawerCommand +{ + int hx; + int sx; + int yl; + int yh; + lighttable_t *dc_colormap; + BYTE *dc_destorg; + int dc_pitch; + int dc_color; + fixed_t dc_light; + +public: + RtShaded1colRGBACommand(int hx, int sx, int yl, int yh) + { + this->hx = hx; + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_colormap = ::dc_colormap; + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_color = ::dc_color; + dc_light = ::dc_light; + } + + void Execute(DrawerThread *thread) override + { + BYTE *colormap; + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + colormap = dc_colormap; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4 + hx]; + pitch = dc_pitch; + + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do { + uint32_t alpha = colormap[*source]; + uint32_t inv_alpha = 64 - alpha; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; + uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; + uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); + } +}; + +class RtShaded4colsRGBACommand : public DrawerCommand +{ + int sx; + int yl; + int yh; + lighttable_t *dc_colormap; + int dc_color; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + +public: + RtShaded4colsRGBACommand(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_colormap = ::dc_colormap; + dc_color = ::dc_color; + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + } + +#ifdef NO_SSE + void Execute(DrawerThread *thread) override + { + BYTE *colormap; + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + colormap = dc_colormap; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do { + for (int i = 0; i < 4; i++) + { + uint32_t alpha = colormap[source[i]]; + uint32_t inv_alpha = 64 - alpha; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; + uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; + uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + source += 4; + dest += pitch; + } while (--count); + } +#else + void Execute(DrawerThread *thread) override + { + BYTE *colormap; + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + colormap = dc_colormap; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light))), _mm_setzero_si128()); + __m128i alpha_one = _mm_set1_epi16(64); + + do { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + __m128i alpha_hi = _mm_set_epi16(64, p3, p3, p3, 64, p2, p2, p2); + __m128i alpha_lo = _mm_set_epi16(64, p1, p1, p1, 64, p0, p0, p0); + __m128i inv_alpha_hi = _mm_subs_epu16(alpha_one, alpha_hi); + __m128i inv_alpha_lo = _mm_subs_epu16(alpha_one, alpha_lo); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * alpha + bg_red * inv_alpha) / 64: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_hi), _mm_mullo_epi16(bg_hi, inv_alpha_hi)), 6); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_lo), _mm_mullo_epi16(bg_lo, inv_alpha_lo)), 6); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } +#endif +}; + +class RtAddClamp1colRGBACommand : public DrawerCommand +{ + int hx; + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + RtAddClamp1colRGBACommand(int hx, int sx, int yl, int yh) + { + this->hx = hx; + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4 + hx]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do { + uint32_t fg = shade_pal_index(*source, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); + } +}; + +class RtAddClamp4colsRGBACommand : public DrawerCommand +{ + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + ShadeConstants dc_shade_constants; + +public: + RtAddClamp4colsRGBACommand(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + dc_shade_constants = ::dc_shade_constants; + } + +#ifdef NO_SSE + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(source[i], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + source += 4; + dest += pitch; + } while (--count); + } +#else + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + ShadeConstants shade_constants = dc_shade_constants; + + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT(light); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } + else + { + SSE_SHADE_INIT(light, shade_constants); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } + } +#endif +}; + +class RtSubClamp1colRGBACommand : public DrawerCommand +{ + int hx; + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + ShadeConstants dc_shade_constants; + +public: + RtSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) + { + this->hx = hx; + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + dc_shade_constants = ::dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4 + hx]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do { + uint32_t fg = shade_pal_index(*source, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); + } +}; + +class RtSubClamp4colsRGBACommand : public DrawerCommand +{ + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + ShadeConstants dc_shade_constants; + +public: + RtSubClamp4colsRGBACommand(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + dc_shade_constants = ::dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(source[i], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + + source += 4; + dest += pitch; + } while (--count); + } +}; + +class RtRevSubClamp1colRGBACommand : public DrawerCommand +{ + int hx; + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + ShadeConstants dc_shade_constants; + +public: + RtRevSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) + { + this->hx = hx; + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + dc_shade_constants = ::dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4 + hx]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do { + uint32_t fg = shade_pal_index(*source, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); + } +}; + +class RtRevSubClamp4colsRGBACommand : public DrawerCommand +{ + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + ShadeConstants dc_shade_constants; + +public: + RtRevSubClamp4colsRGBACommand(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + dc_shade_constants = ::dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(source[i], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + + source += 4; + dest += pitch; + } while (--count); + } +}; + +class RtInitColsRGBACommand : public DrawerCommand +{ + BYTE *buff; + +public: + RtInitColsRGBACommand(BYTE *buff) + { + this->buff = buff; + } + + void Execute(DrawerThread *thread) override + { + thread->dc_temp_rgba = buff == NULL ? thread->dc_temp_rgbabuff_rgba : (uint32_t*)buff; + } +}; + +class DrawColumnHorizRGBACommand : public DrawerCommand +{ + int dc_count; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + const BYTE *dc_source; + int dc_x; + int dc_yl; + int dc_yh; + +public: + DrawColumnHorizRGBACommand() + { + dc_count = ::dc_count; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_source = ::dc_source; + dc_x = ::dc_x; + dc_yl = ::dc_yl; + dc_yh = ::dc_yh; + } + + void Execute(DrawerThread *thread) override + { + int count = dc_count; + uint32_t *dest; + fixed_t fracstep; + fixed_t frac; + + if (count <= 0) + return; + + { + int x = dc_x & 3; + dest = &thread->dc_temp_rgba[x + 4 * dc_yl]; + } + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + const BYTE *source = dc_source; + + if (count & 1) { + *dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep; + } + if (count & 2) { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest += 8; + } + if (count & 4) { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest[8] = source[frac >> FRACBITS]; frac += fracstep; + dest[12] = source[frac >> FRACBITS]; frac += fracstep; + dest += 16; + } + count >>= 3; + if (!count) return; + + do + { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest[8] = source[frac >> FRACBITS]; frac += fracstep; + dest[12] = source[frac >> FRACBITS]; frac += fracstep; + dest[16] = source[frac >> FRACBITS]; frac += fracstep; + dest[20] = source[frac >> FRACBITS]; frac += fracstep; + dest[24] = source[frac >> FRACBITS]; frac += fracstep; + dest[28] = source[frac >> FRACBITS]; frac += fracstep; + dest += 32; + } while (--count); + } + } +}; + +class FillColumnHorizRGBACommand : public DrawerCommand +{ + int dc_x; + int dc_yl; + int dc_yh; + int dc_count; + int dc_color; + +public: + FillColumnHorizRGBACommand() + { + dc_x = ::dc_x; + dc_count = ::dc_count; + dc_color = ::dc_color; + dc_yl = ::dc_yl; + dc_yh = ::dc_yh; + } + + void Execute(DrawerThread *thread) override + { + int count = dc_count; + int color = dc_color; + uint32_t *dest; + + if (count <= 0) + return; + + { + int x = dc_x & 3; + dest = &thread->dc_temp_rgba[x + 4 * dc_yl]; + } + + if (count & 1) { + *dest = color; + dest += 4; + } + if (!(count >>= 1)) + return; + do { + dest[0] = color; dest[4] = color; + dest += 8; + } while (--count); + } +}; + +///////////////////////////////////////////////////////////////////////////// + // Copies one span at hx to the screen at sx. void rt_copy1col_RGBA_c (int hx, int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4 + hx]; - pitch = dc_pitch; - - if (count & 1) { - *dest = *source; - source += 4; - dest += pitch; - } - if (count & 2) { - dest[0] = source[0]; - dest[pitch] = source[4]; - source += 8; - dest += pitch*2; - } - if (!(count >>= 2)) - return; - - do { - dest[0] = source[0]; - dest[pitch] = source[4]; - dest[pitch*2] = source[8]; - dest[pitch*3] = source[12]; - source += 16; - dest += pitch*4; - } while (--count); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Copies all four spans to the screen starting at sx. @@ -108,293 +1615,23 @@ void rt_copy4cols_RGBA_c (int sx, int yl, int yh) // Maps one span at hx to the screen at sx. void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4 + hx]; - pitch = dc_pitch; - - if (count & 1) { - *dest = shade_pal_index(*source, light, shade_constants); - source += 4; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - dest[0] = shade_pal_index(source[0], light, shade_constants); - dest[pitch] = shade_pal_index(source[4], light, shade_constants); - source += 8; - dest += pitch*2; - } while (--count); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Maps all four spans to the screen starting at sx. void rt_map4cols_RGBA_c (int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4]; - pitch = dc_pitch; - - if (count & 1) { - dest[0] = shade_pal_index(source[0], light, shade_constants); - dest[1] = shade_pal_index(source[1], light, shade_constants); - dest[2] = shade_pal_index(source[2], light, shade_constants); - dest[3] = shade_pal_index(source[3], light, shade_constants); - source += 4; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - dest[0] = shade_pal_index(source[0], light, shade_constants); - dest[1] = shade_pal_index(source[1], light, shade_constants); - dest[2] = shade_pal_index(source[2], light, shade_constants); - dest[3] = shade_pal_index(source[3], light, shade_constants); - dest[pitch] = shade_pal_index(source[4], light, shade_constants); - dest[pitch + 1] = shade_pal_index(source[5], light, shade_constants); - dest[pitch + 2] = shade_pal_index(source[6], light, shade_constants); - dest[pitch + 3] = shade_pal_index(source[7], light, shade_constants); - source += 8; - dest += pitch*2; - } while (--count); -} - -// Maps all four spans to the screen starting at sx. -void rt_map4cols_RGBA_SSE(int sx, int yl, int yh) -{ - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh - yl; - if (count < 0) - return; - count++; - - ShadeConstants shade_constants = dc_shade_constants; - uint32_t light = calc_light_multiplier(dc_light); - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl * 4]; - pitch = dc_pitch; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT(light); - - if (count & 1) { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - - source += 4; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - // shade_pal_index 0-3 - { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - } - - // shade_pal_index 4-7 (pitch) - { - uint32_t p0 = source[4]; - uint32_t p1 = source[5]; - uint32_t p2 = source[6]; - uint32_t p3 = source[7]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)(dest + pitch), fg); - } - - source += 8; - dest += pitch * 2; - } while (--count); - } - else - { - SSE_SHADE_INIT(light, shade_constants); - - if (count & 1) { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - - source += 4; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - // shade_pal_index 0-3 - { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - } - - // shade_pal_index 4-7 (pitch) - { - uint32_t p0 = source[4]; - uint32_t p1 = source[5]; - uint32_t p2 = source[6]; - uint32_t p3 = source[7]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)(dest + pitch), fg); - } - - source += 8; - dest += pitch * 2; - } while (--count); - } + DrawerCommandQueue::QueueCommand(sx, yl, yh); } void rt_Translate1col_RGBA_c(const BYTE *translation, int hx, int yl, int yh) { - int count = yh - yl + 1; - uint32_t *source = &dc_temp_rgba[yl*4 + hx]; - - // Things we do to hit the compiler's optimizer with a clue bat: - // 1. Parallelism is explicitly spelled out by using a separate - // C instruction for each assembly instruction. GCC lets me - // have four temporaries, but VC++ spills to the stack with - // more than two. Two is probably optimal, anyway. - // 2. The results of the translation lookups are explicitly - // stored in byte-sized variables. This causes the VC++ code - // to use byte mov instructions in most cases; for apparently - // random reasons, it will use movzx for some places. GCC - // ignores this and uses movzx always. - - // Do 8 rows at a time. - for (int count8 = count >> 3; count8; --count8) - { - int c0, c1; - BYTE b0, b1; - - c0 = source[0]; c1 = source[4]; - b0 = translation[c0]; b1 = translation[c1]; - source[0] = b0; source[4] = b1; - - c0 = source[8]; c1 = source[12]; - b0 = translation[c0]; b1 = translation[c1]; - source[8] = b0; source[12] = b1; - - c0 = source[16]; c1 = source[20]; - b0 = translation[c0]; b1 = translation[c1]; - source[16] = b0; source[20] = b1; - - c0 = source[24]; c1 = source[28]; - b0 = translation[c0]; b1 = translation[c1]; - source[24] = b0; source[28] = b1; - - source += 32; - } - // Finish by doing 1 row at a time. - for (count &= 7; count; --count, source += 4) - { - source[0] = translation[source[0]]; - } + DrawerCommandQueue::QueueCommand(translation, hx, yl, yh); } void rt_Translate4cols_RGBA_c(const BYTE *translation, int yl, int yh) { - int count = yh - yl + 1; - uint32_t *source = &dc_temp_rgba[yl*4]; - int c0, c1; - BYTE b0, b1; - - // Do 2 rows at a time. - for (int count8 = count >> 1; count8; --count8) - { - c0 = source[0]; c1 = source[1]; - b0 = translation[c0]; b1 = translation[c1]; - source[0] = b0; source[1] = b1; - - c0 = source[2]; c1 = source[3]; - b0 = translation[c0]; b1 = translation[c1]; - source[2] = b0; source[3] = b1; - - c0 = source[4]; c1 = source[5]; - b0 = translation[c0]; b1 = translation[c1]; - source[4] = b0; source[5] = b1; - - c0 = source[6]; c1 = source[7]; - b0 = translation[c0]; b1 = translation[c1]; - source[6] = b0; source[7] = b1; - - source += 8; - } - // Do the final row if count was odd. - if (count & 1) - { - c0 = source[0]; c1 = source[1]; - b0 = translation[c0]; b1 = translation[c1]; - source[0] = b0; source[1] = b1; - - c0 = source[2]; c1 = source[3]; - b0 = translation[c0]; b1 = translation[c1]; - source[2] = b0; source[3] = b1; - } + DrawerCommandQueue::QueueCommand(translation, yl, yh); } // Translates one span at hx to the screen at sx. @@ -414,195 +1651,15 @@ void rt_tlate4cols_RGBA_c (int sx, int yl, int yh) // Adds one span at hx to the screen at sx without clamping. void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4 + hx]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do { - uint32_t fg = shade_pal_index(*source, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Adds all four spans to the screen starting at sx without clamping. void rt_add4cols_RGBA_c (int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = shade_pal_index(source[i], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(sx, yl, yh); } -// Adds all four spans to the screen starting at sx without clamping. -#ifndef NO_SSE -void rt_add4cols_RGBA_SSE(int sx, int yl, int yh) -{ - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh - yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl * 4]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - ShadeConstants shade_constants = dc_shade_constants; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT(light); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += 4; - dest += pitch; - } while (--count); - } - else - { - SSE_SHADE_INIT(light, shade_constants); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += 4; - dest += pitch; - } while (--count); - } -} -#endif - // Translates and adds one span at hx to the screen at sx without clamping. void rt_tlateadd1col_RGBA_c (int hx, int sx, int yl, int yh) { @@ -620,332 +1677,27 @@ void rt_tlateadd4cols_RGBA_c(int sx, int yl, int yh) // Shades one span at hx to the screen at sx. void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh) { - BYTE *colormap; - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - colormap = dc_colormap; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4 + hx]; - pitch = dc_pitch; - - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do { - uint32_t alpha = colormap[*source]; - uint32_t inv_alpha = 64 - alpha; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; - uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; - uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Shades all four spans to the screen starting at sx. void rt_shaded4cols_RGBA_c (int sx, int yl, int yh) { - BYTE *colormap; - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - colormap = dc_colormap; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4]; - pitch = dc_pitch; - - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do { - for (int i = 0; i < 4; i++) - { - uint32_t alpha = colormap[source[i]]; - uint32_t inv_alpha = 64 - alpha; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; - uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; - uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(sx, yl, yh); } -// Shades all four spans to the screen starting at sx. -#ifndef NO_SSE -void rt_shaded4cols_RGBA_SSE(int sx, int yl, int yh) -{ - BYTE *colormap; - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh - yl; - if (count < 0) - return; - count++; - - colormap = dc_colormap; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl * 4]; - pitch = dc_pitch; - - __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light))), _mm_setzero_si128()); - __m128i alpha_one = _mm_set1_epi16(64); - - do { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - __m128i alpha_hi = _mm_set_epi16(64, p3, p3, p3, 64, p2, p2, p2); - __m128i alpha_lo = _mm_set_epi16(64, p1, p1, p1, 64, p0, p0, p0); - __m128i inv_alpha_hi = _mm_subs_epu16(alpha_one, alpha_hi); - __m128i inv_alpha_lo = _mm_subs_epu16(alpha_one, alpha_lo); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * alpha + bg_red * inv_alpha) / 64: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_hi), _mm_mullo_epi16(bg_hi, inv_alpha_hi)), 6); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_lo), _mm_mullo_epi16(bg_lo, inv_alpha_lo)), 6); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += 4; - dest += pitch; - } while (--count); -} -#endif - // Adds one span at hx to the screen at sx with clamping. void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4 + hx]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do { - uint32_t fg = shade_pal_index(*source, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Adds all four spans to the screen starting at sx with clamping. void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = shade_pal_index(source[i], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(sx, yl, yh); } -// Adds all four spans to the screen starting at sx with clamping. -#ifndef NO_SSE -void rt_addclamp4cols_RGBA_SSE(int sx, int yl, int yh) -{ - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh - yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl * 4]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - ShadeConstants shade_constants = dc_shade_constants; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT(light); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += 4; - dest += pitch; - } while (--count); - } - else - { - SSE_SHADE_INIT(light, shade_constants); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += 4; - dest += pitch; - } while (--count); - } -} -#endif - // Translates and adds one span at hx to the screen at sx with clamping. void rt_tlateaddclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { @@ -963,91 +1715,13 @@ void rt_tlateaddclamp4cols_RGBA_c (int sx, int yl, int yh) // Subtracts one span at hx to the screen at sx with clamping. void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4 + hx]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do { - uint32_t fg = shade_pal_index(*source, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Subtracts all four spans to the screen starting at sx with clamping. void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = shade_pal_index(source[i], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and subtracts one span at hx to the screen at sx with clamping. @@ -1067,91 +1741,13 @@ void rt_tlatesubclamp4cols_RGBA_c (int sx, int yl, int yh) // Subtracts one span at hx from the screen at sx with clamping. void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4 + hx]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do { - uint32_t fg = shade_pal_index(*source, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Subtracts all four spans from the screen starting at sx with clamping. void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = shade_pal_index(source[i], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and subtracts one span at hx from the screen at sx with clamping. @@ -1172,102 +1768,41 @@ void rt_tlaterevsubclamp4cols_RGBA_c (int sx, int yl, int yh) // call this function to set up the span pointers. void rt_initcols_rgba (BYTE *buff) { - int y; - - dc_temp_rgba = buff == NULL ? dc_temp_rgbabuff_rgba : (uint32_t*)buff; - for (y = 3; y >= 0; y--) + for (int y = 3; y >= 0; y--) horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0]; + + DrawerCommandQueue::QueueCommand(buff); +} + +void rt_span_coverage_rgba(int x, int start, int stop) +{ + unsigned int **tspan = &dc_ctspan[x & 3]; + (*tspan)[0] = start; + (*tspan)[1] = stop; + *tspan += 2; } // Stretches a column into a temporary buffer which is later // drawn to the screen along with up to three other columns. void R_DrawColumnHorizP_RGBA_C (void) { - int count = dc_count; - uint32_t *dest; - fixed_t fracstep; - fixed_t frac; + int x = dc_x & 3; + unsigned int **span = &dc_ctspan[x]; + (*span)[0] = dc_yl; + (*span)[1] = dc_yh; + *span += 2; - if (count <= 0) - return; - - { - int x = dc_x & 3; - unsigned int **span; - - span = &dc_ctspan[x]; - (*span)[0] = dc_yl; - (*span)[1] = dc_yh; - *span += 2; - dest = &dc_temp_rgba[x + 4*dc_yl]; - } - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - const BYTE *source = dc_source; - - if (count & 1) { - *dest = source[frac>>FRACBITS]; dest += 4; frac += fracstep; - } - if (count & 2) { - dest[0] = source[frac>>FRACBITS]; frac += fracstep; - dest[4] = source[frac>>FRACBITS]; frac += fracstep; - dest += 8; - } - if (count & 4) { - dest[0] = source[frac>>FRACBITS]; frac += fracstep; - dest[4] = source[frac>>FRACBITS]; frac += fracstep; - dest[8] = source[frac>>FRACBITS]; frac += fracstep; - dest[12]= source[frac>>FRACBITS]; frac += fracstep; - dest += 16; - } - count >>= 3; - if (!count) return; - - do - { - dest[0] = source[frac>>FRACBITS]; frac += fracstep; - dest[4] = source[frac>>FRACBITS]; frac += fracstep; - dest[8] = source[frac>>FRACBITS]; frac += fracstep; - dest[12]= source[frac>>FRACBITS]; frac += fracstep; - dest[16]= source[frac>>FRACBITS]; frac += fracstep; - dest[20]= source[frac>>FRACBITS]; frac += fracstep; - dest[24]= source[frac>>FRACBITS]; frac += fracstep; - dest[28]= source[frac>>FRACBITS]; frac += fracstep; - dest += 32; - } while (--count); - } + DrawerCommandQueue::QueueCommand(); } // [RH] Just fills a column with a given color void R_FillColumnHorizP_RGBA_C (void) { - int count = dc_count; - BYTE color = dc_color; - uint32_t *dest; + int x = dc_x & 3; + unsigned int **span = &dc_ctspan[x]; + (*span)[0] = dc_yl; + (*span)[1] = dc_yh; + *span += 2; - if (count <= 0) - return; - - { - int x = dc_x & 3; - unsigned int **span = &dc_ctspan[x]; - - (*span)[0] = dc_yl; - (*span)[1] = dc_yh; - *span += 2; - dest = &dc_temp_rgba[x + 4*dc_yl]; - } - - if (count & 1) { - *dest = color; - dest += 4; - } - if (!(count >>= 1)) - return; - do { - dest[0] = color; dest[4] = color; - dest += 8; - } while (--count); + DrawerCommandQueue::QueueCommand(); } diff --git a/src/r_main.cpp b/src/r_main.cpp index a795f8016..348c70120 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -979,6 +979,8 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, R_SetupBuffer (); screen->Unlock (); + R_FinishDrawerCommands(); + viewactive = savedviewactive; r_swtruecolor = savedoutputformat; diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 645741a2a..62190b606 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -165,6 +165,7 @@ void FSoftwareRenderer::RenderView(player_t *player) R_RenderActorView (player->mo); // [RH] Let cameras draw onto textures that were visible this frame. FCanvasTextureInfo::UpdateAll (); + R_FinishDrawerCommands(); } //========================================================================== diff --git a/src/r_things.cpp b/src/r_things.cpp index c132cc2fd..b3a2daefe 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -706,10 +706,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop } else { - unsigned int **tspan = &dc_ctspan[x & 3]; - (*tspan)[0] = span->Start; - (*tspan)[1] = span->Stop - 1; - *tspan += 2; + rt_span_coverage(x, span->Start, span->Stop - 1); } } if (!(flags & DVF_SPANSONLY) && (x & 3) == 3) From c452d0257380799e6b2d89e6177380ae2a948235 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 7 Jun 2016 15:25:11 +0200 Subject: [PATCH 22/94] Added multicore rendering to true color drawers --- src/r_draw.h | 60 ++++- src/r_draw_rgba.cpp | 535 ++++++++++++++++++++++++++++++------------- src/r_drawt_rgba.cpp | 308 +++++++++++++------------ 3 files changed, 603 insertions(+), 300 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index 98be57c51..bf73c9dfb 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -25,6 +25,9 @@ #include "r_defs.h" #include +#include +#include +#include // Spectre/Invisibility. #define FUZZTABLE 50 @@ -74,7 +77,6 @@ extern "C" unsigned int dc_tspans[4][MAXHEIGHT]; extern "C" unsigned int *dc_ctspan[4]; extern "C" unsigned int horizspans[4]; - // [RH] Pointers to the different column and span drawers... // The span blitting interface. @@ -443,19 +445,58 @@ void R_SetTranslationMap(lighttable_t *translation); // Wait until all drawers finished executing void R_FinishDrawerCommands(); +class DrawerCommandQueue; + class DrawerThread { public: + std::thread thread; + + // Thread line index of this thread int core = 0; + + // Number of active threads int num_cores = 1; uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; uint32_t *dc_temp_rgba; + + // Checks if a line is rendered by this thread + bool line_skipped_by_thread(int line) + { + return line % num_cores != core; + } + + // The number of lines to skip to reach the first line to be rendered by this thread + int skipped_by_thread(int first_line) + { + return (num_cores - (first_line - core) % num_cores) % num_cores; + } + + // The number of lines to be rendered by this thread + int count_for_thread(int first_line, int count) + { + return (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores; + } + + // Calculate the dest address for the first line to be rendered by this thread + uint32_t *dest_for_thread(int first_line, int pitch, uint32_t *dest) + { + return dest + skipped_by_thread(first_line) * pitch; + } }; class DrawerCommand { +protected: + int dc_dest_y; + public: + DrawerCommand() + { + dc_dest_y = static_cast((dc_dest - dc_destorg) / (dc_pitch * 4)); + } + virtual void Execute(DrawerThread *thread) = 0; }; @@ -467,8 +508,25 @@ class DrawerCommandQueue std::vector commands; + std::vector threads; + + std::mutex start_mutex; + std::condition_variable start_condition; + std::vector active_commands; + bool shutdown_flag = false; + int run_id = 0; + + std::mutex end_mutex; + std::condition_variable end_condition; + int finished_threads = 0; + + void StartThreads(); + void StopThreads(); + static DrawerCommandQueue *Instance(); + ~DrawerCommandQueue(); + public: // Allocate memory valid for the duration of a command execution static void* AllocMemory(size_t size); diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 9e61bb427..489716e1f 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -63,6 +63,11 @@ DrawerCommandQueue *DrawerCommandQueue::Instance() return &queue; } +DrawerCommandQueue::~DrawerCommandQueue() +{ + StopThreads(); +} + void* DrawerCommandQueue::AllocMemory(size_t size) { // Make sure allocations remain 16-byte aligned @@ -81,19 +86,102 @@ void DrawerCommandQueue::Finish() { auto queue = Instance(); - DrawerThread thread; + // Give worker threads something to do: - size_t size = queue->commands.size(); + std::unique_lock start_lock(queue->start_mutex); + queue->active_commands.swap(queue->commands); + queue->run_id++; + start_lock.unlock(); + + queue->StartThreads(); + queue->start_condition.notify_all(); + + // Do one thread ourselves: + + DrawerThread thread; + thread.core = 0; + thread.num_cores = queue->threads.size() + 1; + + size_t size = queue->active_commands.size(); for (size_t i = 0; i < size; i++) { - auto &command = queue->commands[i]; + auto &command = queue->active_commands[i]; command->Execute(&thread); } - for (auto &command : queue->commands) + // Wait for everyone to finish: + + std::unique_lock end_lock(queue->end_mutex); + queue->end_condition.wait(end_lock, [&]() { return queue->finished_threads == queue->threads.size(); }); + + // Clean up batch: + + for (auto &command : queue->active_commands) command->~DrawerCommand(); - queue->commands.clear(); + queue->active_commands.clear(); queue->memorypool_pos = 0; + queue->finished_threads = 0; +} + +void DrawerCommandQueue::StartThreads() +{ + if (!threads.empty()) + return; + + int num_threads = std::thread::hardware_concurrency(); + if (num_threads == 0) + num_threads = 4; + + threads.resize(num_threads - 1); + + for (int i = 0; i < num_threads - 1; i++) + { + DrawerCommandQueue *queue = this; + DrawerThread *thread = &threads[i]; + thread->core = i + 1; + thread->num_cores = num_threads; + thread->thread = std::thread([=]() + { + int run_id = 0; + while (true) + { + // Wait until we are signalled to run: + std::unique_lock start_lock(queue->start_mutex); + queue->start_condition.wait(start_lock, [&]() { return queue->run_id != run_id || queue->shutdown_flag; }); + if (queue->shutdown_flag) + break; + run_id = queue->run_id; + start_lock.unlock(); + + // Do the work: + size_t size = queue->active_commands.size(); + for (size_t i = 0; i < size; i++) + { + auto &command = queue->active_commands[i]; + command->Execute(thread); + } + + // Notify main thread that we finished: + std::unique_lock end_lock(queue->end_mutex); + queue->finished_threads++; + end_lock.unlock(); + queue->end_condition.notify_all(); + } + }); + } +} + +void DrawerCommandQueue::StopThreads() +{ + std::unique_lock lock(start_mutex); + shutdown_flag = true; + lock.unlock(); + start_condition.notify_all(); + for (auto &thread : threads) + thread.thread.join(); + threads.clear(); + lock.lock(); + shutdown_flag = false; } ///////////////////////////////////////////////////////////////////////////// @@ -129,28 +217,28 @@ public: fixed_t frac; fixed_t fracstep; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); // Zero length, column does not exceed a pixel. if (count <= 0) return; // Framebuffer destination address. - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; // Determine scaling, // which is the only mapping to be done. - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = dc_iscale * thread->num_cores; + frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); { // [RH] Get local copies of these variables so that the compiler // has a better chance of optimizing this well. const BYTE *source = dc_source; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; // Inner loop that does the actual texture mapping, // e.g. a DDA-lile scaling. @@ -190,17 +278,17 @@ public: int count; uint32_t* dest; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); uint32_t light = calc_light_multiplier(dc_light); { - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; BYTE color = dc_color; do @@ -235,12 +323,12 @@ public: int count; uint32_t *dest; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; - int pitch = dc_pitch; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + int pitch = dc_pitch * thread->num_cores; uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; @@ -286,12 +374,12 @@ public: int count; uint32_t *dest; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; - int pitch = dc_pitch; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + int pitch = dc_pitch * thread->num_cores; uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; @@ -337,12 +425,12 @@ public: int count; uint32_t *dest; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; - int pitch = dc_pitch; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + int pitch = dc_pitch * thread->num_cores; uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; @@ -388,12 +476,12 @@ public: int count; uint32_t *dest; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; - int pitch = dc_pitch; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + int pitch = dc_pitch * thread->num_cores; uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; @@ -451,15 +539,13 @@ public: if (dc_yh > fuzzviewheight) dc_yh = fuzzviewheight; - count = dc_yh - dc_yl; + count = thread->count_for_thread(dc_yl, dc_yh - dc_yl + 1); // Zero length. - if (count < 0) + if (count <= 0) return; - count++; - - dest = ylookup[dc_yl] + dc_x + (uint32_t*)dc_destorg; + dest = thread->dest_for_thread(dc_yl, dc_pitch, ylookup[dc_yl] + dc_x + (uint32_t*)dc_destorg); // Note: this implementation assumes this function is only used for the pinky shadow effect (i.e. no other fancy colormap than black) // I'm not sure if this is really always the case or not. @@ -467,7 +553,7 @@ public: { // [RH] Make local copies of global vars to try and improve // the optimizations made by the compiler. - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; int fuzz = fuzzpos; int cnt; @@ -573,18 +659,18 @@ public: fixed_t frac; fixed_t fracstep; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = dc_iscale * thread->num_cores; + frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); { const BYTE *source = dc_source; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -649,23 +735,23 @@ public: fixed_t frac; fixed_t fracstep; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = dc_iscale * thread->num_cores; + frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); { // [RH] Local copies of global vars to improve compiler optimizations BYTE *translation = dc_translation; const BYTE *source = dc_source; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; do { @@ -710,22 +796,22 @@ public: fixed_t frac; fixed_t fracstep; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = dc_iscale * thread->num_cores; + frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); { BYTE *translation = dc_translation; const BYTE *source = dc_source; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -787,15 +873,15 @@ public: uint32_t *dest; fixed_t frac, fracstep; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = dc_iscale * thread->num_cores; + frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 16) & 0xff; @@ -805,7 +891,7 @@ public: { const BYTE *source = dc_source; BYTE *colormap = dc_colormap; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; do { @@ -863,18 +949,18 @@ public: fixed_t frac; fixed_t fracstep; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = dc_iscale * thread->num_cores; + frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); { const BYTE *source = dc_source; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -941,19 +1027,19 @@ public: fixed_t frac; fixed_t fracstep; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = dc_iscale * thread->num_cores; + frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); { BYTE *translation = dc_translation; const BYTE *source = dc_source; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -1018,18 +1104,18 @@ public: fixed_t frac; fixed_t fracstep; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = dc_iscale * thread->num_cores; + frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); { const BYTE *source = dc_source; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -1096,19 +1182,19 @@ public: fixed_t frac; fixed_t fracstep; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = dc_iscale * thread->num_cores; + frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); { BYTE *translation = dc_translation; const BYTE *source = dc_source; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -1173,18 +1259,18 @@ public: fixed_t frac; fixed_t fracstep; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = dc_iscale * thread->num_cores; + frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); { const BYTE *source = dc_source; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); @@ -1250,19 +1336,19 @@ public: fixed_t frac; fixed_t fracstep; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = dc_iscale * thread->num_cores; + frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); { BYTE *translation = dc_translation; const BYTE *source = dc_source; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -1329,6 +1415,9 @@ public: #ifdef NO_SSE void Execute(DrawerThread *thread) override { + if (thread->line_skipped_by_thread(ds_y)) + return; + dsfixed_t xfrac; dsfixed_t yfrac; dsfixed_t xstep; @@ -1391,6 +1480,9 @@ public: #else void Execute(DrawerThread *thread) override { + if (thread->line_skipped_by_thread(ds_y)) + return; + dsfixed_t xfrac; dsfixed_t yfrac; dsfixed_t xstep; @@ -1572,6 +1664,9 @@ public: void Execute(DrawerThread *thread) override { + if (thread->line_skipped_by_thread(ds_y)) + return; + dsfixed_t xfrac; dsfixed_t yfrac; dsfixed_t xstep; @@ -1671,6 +1766,9 @@ public: void Execute(DrawerThread *thread) override { + if (thread->line_skipped_by_thread(ds_y)) + return; + dsfixed_t xfrac; dsfixed_t yfrac; dsfixed_t xstep; @@ -1789,6 +1887,9 @@ public: void Execute(DrawerThread *thread) override { + if (thread->line_skipped_by_thread(ds_y)) + return; + dsfixed_t xfrac; dsfixed_t yfrac; dsfixed_t xstep; @@ -1917,6 +2018,9 @@ public: void Execute(DrawerThread *thread) override { + if (thread->line_skipped_by_thread(ds_y)) + return; + dsfixed_t xfrac; dsfixed_t yfrac; dsfixed_t xstep; @@ -2035,6 +2139,9 @@ public: void Execute(DrawerThread *thread) override { + if (thread->line_skipped_by_thread(ds_y)) + return; + dsfixed_t xfrac; dsfixed_t yfrac; dsfixed_t xstep; @@ -2149,6 +2256,9 @@ public: void Execute(DrawerThread *thread) override { + if (thread->line_skipped_by_thread(ds_y)) + return; + uint32_t *dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; int count = (ds_x2 - ds_x1 + 1); uint32_t light = calc_light_multiplier(ds_light); @@ -2186,13 +2296,16 @@ public: void Execute(DrawerThread *thread) override { - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + DWORD fracstep = dc_iscale * thread->num_cores; + DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = vlinebits; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -2238,8 +2351,12 @@ public: #ifdef NO_SSE void Execute(DrawerThread *thread) override { - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + int pitch = dc_pitch * thread->num_cores; int bits = vlinebits; DWORD place; @@ -2250,21 +2367,34 @@ public: ShadeConstants shade_constants = dc_shade_constants; + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(dc_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + do { - dest[0] = shade_pal_index(bufplce[0][(place = vplce[0]) >> bits], light0, shade_constants); vplce[0] = place + vince[0]; - dest[1] = shade_pal_index(bufplce[1][(place = vplce[1]) >> bits], light1, shade_constants); vplce[1] = place + vince[1]; - dest[2] = shade_pal_index(bufplce[2][(place = vplce[2]) >> bits], light2, shade_constants); vplce[2] = place + vince[2]; - dest[3] = shade_pal_index(bufplce[3][(place = vplce[3]) >> bits], light3, shade_constants); vplce[3] = place + vince[3]; - dest += dc_pitch; + dest[0] = shade_pal_index(bufplce[0][(place = local_vplce[0]) >> bits], light0, shade_constants); local_vplce[0] = place + local_vince[0]; + dest[1] = shade_pal_index(bufplce[1][(place = local_vplce[1]) >> bits], light1, shade_constants); local_vplce[1] = place + local_vince[1]; + dest[2] = shade_pal_index(bufplce[2][(place = local_vplce[2]) >> bits], light2, shade_constants); local_vplce[2] = place + local_vince[2]; + dest[3] = shade_pal_index(bufplce[3][(place = local_vplce[3]) >> bits], light3, shade_constants); local_vplce[3] = place + local_vince[3]; + dest += pitch; } while (--count); } #else void Execute(DrawerThread *thread) override { - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = vlinebits; + int pitch = dc_pitch * thread->num_cores; uint32_t light0 = calc_light_multiplier(palookuplight[0]); uint32_t light1 = calc_light_multiplier(palookuplight[1]); @@ -2276,6 +2406,12 @@ public: uint32_t *palette = (uint32_t*)GPalette.BaseColors; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(dc_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } if (shade_constants.simple_shade) { @@ -2300,7 +2436,7 @@ public: __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); SSE_SHADE_SIMPLE(fg); _mm_storeu_si128((__m128i*)dest, fg); - dest += dc_pitch; + dest += pitch; } while (--count); } else @@ -2326,7 +2462,7 @@ public: __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); SSE_SHADE(fg, shade_constants); _mm_storeu_si128((__m128i*)dest, fg); - dest += dc_pitch; + dest += pitch; } while (--count); } } @@ -2361,13 +2497,16 @@ public: void Execute(DrawerThread *thread) override { - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + DWORD fracstep = dc_iscale * thread->num_cores; + DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = mvlinebits; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -2417,8 +2556,12 @@ public: #ifdef NO_SSE void Execute(DrawerThread *thread) override { - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + int pitch = dc_pitch * thread->num_cores; int bits = mvlinebits; DWORD place; @@ -2429,21 +2572,34 @@ public: ShadeConstants shade_constants = dc_shade_constants; + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(dc_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + do { BYTE pix; - pix = bufplce[0][(place = vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(pix, light0, shade_constants); vplce[0] = place + vince[0]; - pix = bufplce[1][(place = vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(pix, light1, shade_constants); vplce[1] = place + vince[1]; - pix = bufplce[2][(place = vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(pix, light2, shade_constants); vplce[2] = place + vince[2]; - pix = bufplce[3][(place = vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(pix, light3, shade_constants); vplce[3] = place + vince[3]; - dest += dc_pitch; + pix = bufplce[0][(place = local_vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(pix, light0, shade_constants); local_vplce[0] = place + local_vince[0]; + pix = bufplce[1][(place = local_vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(pix, light1, shade_constants); local_vplce[1] = place + local_vince[1]; + pix = bufplce[2][(place = local_vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(pix, light2, shade_constants); local_vplce[2] = place + local_vince[2]; + pix = bufplce[3][(place = local_vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(pix, light3, shade_constants); local_vplce[3] = place + local_vince[3]; + dest += pitch; } while (--count); } #else void Execute(DrawerThread *thread) override { - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + int pitch = dc_pitch * thread->num_cores; int bits = mvlinebits; uint32_t light0 = calc_light_multiplier(palookuplight[0]); @@ -2456,6 +2612,12 @@ public: uint32_t *palette = (uint32_t*)GPalette.BaseColors; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(dc_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } if (shade_constants.simple_shade) { @@ -2483,7 +2645,7 @@ public: __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); SSE_SHADE_SIMPLE(fg); _mm_maskmoveu_si128(fg, movemask, (char*)dest); - dest += dc_pitch; + dest += pitch; } while (--count); } else @@ -2512,7 +2674,7 @@ public: __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); SSE_SHADE(fg, shade_constants); _mm_maskmoveu_si128(fg, movemask, (char*)dest); - dest += dc_pitch; + dest += pitch; } while (--count); } } @@ -2551,13 +2713,16 @@ public: void Execute(DrawerThread *thread) override { - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + DWORD fracstep = dc_iscale * thread->num_cores; + DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = tmvlinebits; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -2626,8 +2791,12 @@ public: void Execute(DrawerThread *thread) override { - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + int pitch = dc_pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -2641,11 +2810,20 @@ public: uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(dc_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + do { for (int i = 0; i < 4; ++i) { - BYTE pix = bufplce[i][vplce[i] >> bits]; + BYTE pix = bufplce[i][local_vplce[i] >> bits]; if (pix != 0) { uint32_t fg = shade_pal_index(pix, light[i], shade_constants); @@ -2663,9 +2841,9 @@ public: dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } - vplce[i] += vince[i]; + local_vplce[i] += local_vince[i]; } - dest += dc_pitch; + dest += pitch; } while (--count); } }; @@ -2702,13 +2880,16 @@ public: void Execute(DrawerThread *thread) override { - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + DWORD fracstep = dc_iscale * thread->num_cores; + DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = tmvlinebits; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -2777,8 +2958,12 @@ public: void Execute(DrawerThread *thread) override { - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + int pitch = dc_pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -2792,11 +2977,20 @@ public: uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(dc_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + do { for (int i = 0; i < 4; ++i) { - BYTE pix = bufplce[i][vplce[i] >> bits]; + BYTE pix = bufplce[i][local_vplce[i] >> bits]; if (pix != 0) { uint32_t fg = shade_pal_index(pix, light[i], shade_constants); @@ -2814,9 +3008,9 @@ public: dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } - vplce[i] += vince[i]; + local_vplce[i] += local_vince[i]; } - dest += dc_pitch; + dest += pitch; } while (--count); } }; @@ -2853,13 +3047,16 @@ public: void Execute(DrawerThread *thread) override { - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + DWORD fracstep = dc_iscale * thread->num_cores; + DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = tmvlinebits; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -2928,8 +3125,12 @@ public: void Execute(DrawerThread *thread) override { - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + int pitch = dc_pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -2943,11 +3144,20 @@ public: uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(dc_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + do { for (int i = 0; i < 4; ++i) { - BYTE pix = bufplce[i][vplce[i] >> bits]; + BYTE pix = bufplce[i][local_vplce[i] >> bits]; if (pix != 0) { uint32_t fg = shade_pal_index(pix, light[i], shade_constants); @@ -2965,9 +3175,9 @@ public: dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } - vplce[i] += vince[i]; + local_vplce[i] += local_vince[i]; } - dest += dc_pitch; + dest += pitch; } while (--count); } }; @@ -3004,13 +3214,16 @@ public: void Execute(DrawerThread *thread) override { - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + DWORD fracstep = dc_iscale * thread->num_cores; + DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = tmvlinebits; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -3079,8 +3292,12 @@ public: void Execute(DrawerThread *thread) override { - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + int pitch = dc_pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -3094,11 +3311,20 @@ public: uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(dc_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + do { for (int i = 0; i < 4; ++i) { - BYTE pix = bufplce[i][vplce[i] >> bits]; + BYTE pix = bufplce[i][local_vplce[i] >> bits]; if (pix != 0) { uint32_t fg = shade_pal_index(pix, light[i], shade_constants); @@ -3116,9 +3342,9 @@ public: dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } - vplce[i] += vince[i]; + local_vplce[i] += local_vince[i]; } - dest += dc_pitch; + dest += pitch; } while (--count); } }; @@ -3146,6 +3372,9 @@ public: void Execute(DrawerThread *thread) override { + if (thread->line_skipped_by_thread(_y)) + return; + int y = _y; int x = _x; int x2 = _x2; diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index bbf68a795..c2caec0c2 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -78,26 +78,26 @@ public: uint32_t *source; uint32_t *dest; int count; - int pitch; + int pitch, sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, (yh - yl + 1)); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4 + hx]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = thread->num_cores * 4; if (count & 1) { *dest = *source; - source += 4; + source += sincr; dest += pitch; } if (count & 2) { dest[0] = source[0]; - dest[pitch] = source[4]; - source += 8; + dest[pitch] = source[sincr]; + source += sincr * 2; dest += pitch * 2; } if (!(count >>= 2)) @@ -105,10 +105,10 @@ public: do { dest[0] = source[0]; - dest[pitch] = source[4]; - dest[pitch * 2] = source[8]; - dest[pitch * 3] = source[12]; - source += 16; + dest[pitch] = source[sincr]; + dest[pitch * 2] = source[sincr * 2]; + dest[pitch * 3] = source[sincr * 3]; + source += sincr * 4; dest += pitch * 4; } while (--count); } @@ -145,22 +145,23 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4 + hx]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = thread->num_cores * 4; if (count & 1) { *dest = shade_pal_index(*source, light, shade_constants); - source += 4; + source += sincr; dest += pitch; } if (!(count >>= 1)) @@ -168,8 +169,8 @@ public: do { dest[0] = shade_pal_index(source[0], light, shade_constants); - dest[pitch] = shade_pal_index(source[4], light, shade_constants); - source += 8; + dest[pitch] = shade_pal_index(source[sincr], light, shade_constants); + source += sincr * 2; dest += pitch * 2; } while (--count); } @@ -205,25 +206,26 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = thread->num_cores * 4; if (count & 1) { dest[0] = shade_pal_index(source[0], light, shade_constants); dest[1] = shade_pal_index(source[1], light, shade_constants); dest[2] = shade_pal_index(source[2], light, shade_constants); dest[3] = shade_pal_index(source[3], light, shade_constants); - source += 4; + source += sincr; dest += pitch; } if (!(count >>= 1)) @@ -234,11 +236,11 @@ public: dest[1] = shade_pal_index(source[1], light, shade_constants); dest[2] = shade_pal_index(source[2], light, shade_constants); dest[3] = shade_pal_index(source[3], light, shade_constants); - dest[pitch] = shade_pal_index(source[4], light, shade_constants); - dest[pitch + 1] = shade_pal_index(source[5], light, shade_constants); - dest[pitch + 2] = shade_pal_index(source[6], light, shade_constants); - dest[pitch + 3] = shade_pal_index(source[7], light, shade_constants); - source += 8; + dest[pitch] = shade_pal_index(source[sincr], light, shade_constants); + dest[pitch + 1] = shade_pal_index(source[sincr + 1], light, shade_constants); + dest[pitch + 2] = shade_pal_index(source[sincr + 2], light, shade_constants); + dest[pitch + 3] = shade_pal_index(source[sincr + 3], light, shade_constants); + source += sincr * 2; dest += pitch * 2; } while (--count); } @@ -249,19 +251,20 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; ShadeConstants shade_constants = dc_shade_constants; uint32_t light = calc_light_multiplier(dc_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = thread->num_cores * 4; if (shade_constants.simple_shade) { @@ -278,7 +281,7 @@ public: SSE_SHADE_SIMPLE(fg); _mm_storeu_si128((__m128i*)dest, fg); - source += 4; + source += sincr; dest += pitch; } if (!(count >>= 1)) @@ -299,17 +302,17 @@ public: // shade_pal_index 4-7 (pitch) { - uint32_t p0 = source[4]; - uint32_t p1 = source[5]; - uint32_t p2 = source[6]; - uint32_t p3 = source[7]; + uint32_t p0 = source[sincr]; + uint32_t p1 = source[sincr + 1]; + uint32_t p2 = source[sincr + 2]; + uint32_t p3 = source[sincr + 3]; __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); SSE_SHADE_SIMPLE(fg); _mm_storeu_si128((__m128i*)(dest + pitch), fg); } - source += 8; + source += sincr * 2; dest += pitch * 2; } while (--count); } @@ -328,7 +331,7 @@ public: SSE_SHADE(fg, shade_constants); _mm_storeu_si128((__m128i*)dest, fg); - source += 4; + source += sincr; dest += pitch; } if (!(count >>= 1)) @@ -349,17 +352,17 @@ public: // shade_pal_index 4-7 (pitch) { - uint32_t p0 = source[4]; - uint32_t p1 = source[5]; - uint32_t p2 = source[6]; - uint32_t p3 = source[7]; + uint32_t p0 = source[sincr]; + uint32_t p1 = source[sincr + 1]; + uint32_t p2 = source[sincr + 2]; + uint32_t p3 = source[sincr + 3]; __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); SSE_SHADE(fg, shade_constants); _mm_storeu_si128((__m128i*)(dest + pitch), fg); } - source += 8; + source += sincr * 2; dest += pitch * 2; } while (--count); } @@ -522,15 +525,16 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4 + hx]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -554,7 +558,7 @@ public: *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -590,15 +594,16 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -625,7 +630,7 @@ public: dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -636,15 +641,16 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; @@ -686,7 +692,7 @@ public: __m128i color = _mm_packus_epi16(color_lo, color_hi); _mm_storeu_si128((__m128i*)dest, color); - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -722,7 +728,7 @@ public: __m128i color = _mm_packus_epi16(color_lo, color_hi); _mm_storeu_si128((__m128i*)dest, color); - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -764,16 +770,17 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; colormap = dc_colormap; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4 + hx]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 16) & 0xff; @@ -793,7 +800,7 @@ public: uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -832,16 +839,17 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; colormap = dc_colormap; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 16) & 0xff; @@ -864,7 +872,7 @@ public: dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -876,16 +884,17 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; colormap = dc_colormap; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light))), _mm_setzero_si128()); __m128i alpha_one = _mm_set1_epi16(64); @@ -913,7 +922,7 @@ public: __m128i color = _mm_packus_epi16(color_lo, color_hi); _mm_storeu_si128((__m128i*)dest, color); - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -955,15 +964,16 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4 + hx]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -986,7 +996,7 @@ public: uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -1026,15 +1036,16 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -1060,7 +1071,7 @@ public: dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -1071,15 +1082,16 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; @@ -1121,7 +1133,7 @@ public: __m128i color = _mm_packus_epi16(color_lo, color_hi); _mm_storeu_si128((__m128i*)dest, color); - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -1157,7 +1169,7 @@ public: __m128i color = _mm_packus_epi16(color_lo, color_hi); _mm_storeu_si128((__m128i*)dest, color); - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -1200,15 +1212,16 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4 + hx]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -1231,7 +1244,7 @@ public: uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -1270,15 +1283,16 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -1305,7 +1319,7 @@ public: dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -1346,15 +1360,16 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4 + hx]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -1377,7 +1392,7 @@ public: uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -1416,15 +1431,16 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -1451,7 +1467,7 @@ public: dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } - source += 4; + source += sincr; dest += pitch; } while (--count); } From c59db95cc87543e648f64a19d37f78f2a6656d4d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 9 Jun 2016 23:12:38 +0200 Subject: [PATCH 23/94] Rewrote wallscan to fix buffer overruns and code duplication. --- src/r_segs.cpp | 910 +++++++++++++++---------------------------------- 1 file changed, 283 insertions(+), 627 deletions(-) diff --git a/src/r_segs.cpp b/src/r_segs.cpp index bd2c7d22b..5aa7c29a2 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1065,53 +1065,149 @@ void R_RenderFakeWallRange (drawseg_t *ds, int x1, int x2) return; } -// prevlineasm1 is like vlineasm1 but skips the loop if only drawing one pixel -inline fixed_t prevline1 (fixed_t vince, BYTE *colormap, fixed_t light, int count, fixed_t vplce, const BYTE *bufplce, BYTE *dest) +// Draw a column with support for non-power-of-two ranges +uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv_step, uint32_t uv_max, const BYTE *source, DWORD(*draw1column)()) { - dc_iscale = vince; - dc_colormap = colormap; - dc_light = light; - dc_count = count; - dc_texturefrac = vplce; - dc_source = bufplce; - dc_dest = dest; - return doprevline1 (); + int pixelsize = r_swtruecolor ? 4 : 1; + if (uv_max == 0) // power of two + { + int count = y2 - y1; + if (count > 0) + { + dc_source = source; + dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_count = y2 - y1; + dc_iscale = uv_step; + dc_texturefrac = uv_start; + draw1column(); + } + return uv_start + uv_step * (uint32_t)count; + } + else + { + uint32_t uv_pos = uv_start; + + int left = y2 - y1; + while (left > 0) + { + int next_uv_wrap = (uv_max - uv_pos + uv_step - 1) / uv_step; + int count = MIN(left, next_uv_wrap); + + if (count > 0) + { + dc_source = source; + dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_count = count; + dc_iscale = uv_step; + dc_texturefrac = uv_pos; + draw1column(); + } + + left -= count; + uv_pos += uv_step * count; + if (uv_pos >= uv_max) + uv_pos -= uv_max; + } + + return uv_pos; + } } -void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, - double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) +// Draw four columns with support for non-power-of-two ranges +void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_step, uint32_t uv_max, const BYTE **source, void(*draw4columns)()) { - int x, fracbits; - int y1ve[4], y2ve[4], u4, d4, z; - char bad; - float light = rw_light - rw_lightstep; - SDWORD xoffset; - BYTE *basecolormapdata; - double iscale; - - // This function also gets used to draw skies. Unlike BUILD, skies are - // drawn by visplane instead of by bunch, so these checks are invalid. - //if ((uwal[x1] > viewheight) && (uwal[x2] > viewheight)) return; - //if ((dwal[x1] < 0) && (dwal[x2] < 0)) return; - - if (rw_pic->UseType == FTexture::TEX_Null) - { - return; - } - -//extern cycle_t WallScanCycles; -//clock (WallScanCycles); - int pixelsize = r_swtruecolor ? 4 : 1; + if (uv_max == 0) // power of two, no wrap handling needed + { + for (int i = 0; i < 4; i++) + { + bufplce[i] = source[i]; + vplce[i] = uv_pos[i]; + vince[i] = uv_step[i]; + uv_pos[i] += uv_step[i] * (y2 - y1); + } + dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_count = y2 - y1; + draw4columns(); + } + else + { + dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + for (int i = 0; i < 4; i++) + bufplce[i] = source[i]; - rw_pic->GetHeight(); // Make sure texture size is loaded - fracbits = 32 - rw_pic->HeightBits; - setupvline(fracbits); - xoffset = rw_offset; - basecolormapdata = basecolormap->Maps; + int left = y2 - y1; + while (left > 0) + { + // Find which column wraps first + int count = left; + for (int i = 0; i < 4; i++) + { + int next_uv_wrap = (uv_max - uv_pos[i] + uv_step[i] - 1) / uv_step[i]; + count = MIN(next_uv_wrap, count); + } - x = x1; - //while ((umost[x] > dmost[x]) && (x < x2)) x++; + // Draw until that column wraps + if (count > 0) + { + for (int i = 0; i < 4; i++) + { + vplce[i] = uv_pos[i]; + vince[i] = uv_step[i]; + } + dc_count = count; + draw4columns(); + } + + // Wrap the uv position + for (int i = 0; i < 4; i++) + { + uv_pos[i] += uv_step[i] * count; + if (uv_pos[i] >= uv_max) + uv_pos[i] -= uv_max; + } + + left -= count; + } + } +} + +// Calculates a wrapped uv start position value for a column +void calc_uv_start_and_step(int y1, float swal, double yrepeat, uint32_t uv_height, int fracbits, uint32_t &uv_start_out, uint32_t &uv_step_out) +{ + double uv_stepd = swal * yrepeat; + + // Find start uv in [0-uv_height[ range. + // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. + double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / uv_height; + v = v - std::floor(v); + v *= uv_height; + v *= (1 << fracbits); + + uv_start_out = (uint32_t)v; + uv_step_out = xs_ToFixed(fracbits, uv_stepd); +} + +typedef DWORD(*Draw1ColumnFuncPtr)(); +typedef void(*Draw4ColumnsFuncPtr)(); + +void wallscan_any( + int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, + const BYTE *(*getcol)(FTexture *tex, int x), + void(setupwallscan(int bits,Draw1ColumnFuncPtr &draw1, Draw4ColumnsFuncPtr &draw2))) +{ + if (rw_pic->UseType == FTexture::TEX_Null) + return; + + uint32_t uv_height = rw_pic->GetHeight(); + uint32_t fracbits = 32 - rw_pic->HeightBits; + uint32_t uv_max = uv_height << fracbits; + + DWORD(*draw1column)(); + void(*draw4columns)(); + setupwallscan(fracbits, draw1column, draw4columns); + + fixed_t xoffset = rw_offset; bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); if (fixed) @@ -1131,139 +1227,190 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l else R_SetColorMapLight(basecolormap, 0, 0); - for(; (x < x2) && (x & 3); ++x) + float light = rw_light; + + // Calculate where 4 column alignment begins and ends: + int aligned_x1 = clamp((x1 + 3) / 4 * 4, x1, x2); + int aligned_x2 = clamp(x2 / 4 * 4, x1, x2); + + // First unaligned columns: + for (int x = x1; x < aligned_x1; x++, light += rw_lightstep) { - light += rw_lightstep; - y1ve[0] = uwal[x];//max(uwal[x],umost[x]); - y2ve[0] = dwal[x];//min(dwal[x],dmost[x]); - if (y2ve[0] <= y1ve[0]) continue; - assert (y1ve[0] < viewheight); - assert (y2ve[0] <= viewheight); + int y1 = uwal[x]; + int y2 = dwal[x]; + if (y2 <= y1) + continue; if (!fixed) - { // calculate lighting R_SetColorMapLight(basecolormap, light, wallshade); - } - dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dc_dest = (ylookup[y1ve[0]] + x)*pixelsize + dc_destorg; - dc_count = y2ve[0] - y1ve[0]; - iscale = swal[x] * yrepeat; - dc_iscale = xs_ToFixed(fracbits, iscale); - dc_texturefrac = xs_ToFixed(fracbits, dc_texturemid + iscale * (y1ve[0] - CenterY + 0.5)); + const BYTE *source = getcol(rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dovline1(); + uint32_t uv_start, uv_step; + calc_uv_start_and_step(y1, swal[x], yrepeat, uv_height, fracbits, uv_start, uv_step); + + wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, source, draw1column); } - for(; x < x2-3; x += 4) + // The aligned columns + for (int x = aligned_x1; x < aligned_x2; x += 4) { - bad = 0; - for (z = 3; z>= 0; --z) - { - y1ve[z] = uwal[x+z];//max(uwal[x+z],umost[x+z]); - y2ve[z] = dwal[x+z];//min(dwal[x+z],dmost[x+z])-1; - if (y2ve[z] <= y1ve[z]) { bad += 1<> FRACBITS); - iscale = swal[x + z] * yrepeat; - vince[z] = xs_ToFixed(fracbits, iscale); - vplce[z] = xs_ToFixed(fracbits, dc_texturemid + iscale * (y1ve[z] - CenterY + 0.5)); - } - if (bad == 15) + const BYTE *source[4]; + for (int i = 0; i < 4; i++) + source[i] = getcol(rw_pic, (lwal[x + i] + xoffset) >> FRACBITS); + + float lights[4]; + for (int i = 0; i < 4; i++) { - light += rw_lightstep * 4; + lights[i] = light; + light += rw_lightstep; + } + + uint32_t uv_pos[4], uv_step[4]; + for (int i = 0; i < 4; i++) + calc_uv_start_and_step(y1[i], swal[x + i], yrepeat, uv_height, fracbits, uv_pos[i], uv_step[i]); + + // Figure out where we vertically can start and stop drawing 4 columns in one go + int middle_y1 = y1[0]; + int middle_y2 = y2[0]; + for (int i = 1; i < 4; i++) + { + middle_y1 = MAX(y1[i], middle_y1); + middle_y2 = MIN(y2[i], middle_y2); + } + + // If we got an empty column in our set we cannot draw 4 columns in one go: + bool empty_column_in_set = false; + for (int i = 0; i < 4; i++) + { + if (y2[i] <= y1[i]) + empty_column_in_set = true; + } + + if (empty_column_in_set || middle_y2 <= middle_y1) + { + for (int i = 0; i < 4; i++) + { + if (!fixed) + R_SetColorMapLight(basecolormap, lights[i], wallshade); + wallscan_drawcol1(x + i, y1[i], y2[i], uv_pos[i], uv_step[i], uv_max, source[i], draw1column); + } continue; } + // Draw the first rows where not all 4 columns are active + for (int i = 0; i < 4; i++) + { + if (!fixed) + R_SetColorMapLight(basecolormap, lights[i], wallshade); + + if (y1[i] < middle_y1) + uv_pos[i] = wallscan_drawcol1(x + i, y1[i], middle_y1, uv_pos[i], uv_step[i], uv_max, source[i], draw1column); + } + + // Draw the area where all 4 columns are active if (!fixed) { - for (z = 0; z < 4; ++z) + for (int i = 0; i < 4; i++) { - light += rw_lightstep; if (r_swtruecolor) { - palookupoffse[z] = basecolormapdata; - palookuplight[z] = LIGHTSCALE(light, wallshade); + palookupoffse[i] = basecolormap->Maps; + palookuplight[i] = LIGHTSCALE(lights[i], wallshade); } else { - palookupoffse[z] = basecolormapdata + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - palookuplight[z] = 0; + palookupoffse[i] = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); + palookuplight[i] = 0; } } } + wallscan_drawcol4(x, middle_y1, middle_y2, uv_pos, uv_step, uv_max, source, draw4columns); - u4 = MAX(MAX(y1ve[0],y1ve[1]),MAX(y1ve[2],y1ve[3])); - d4 = MIN(MIN(y2ve[0],y2ve[1]),MIN(y2ve[2],y2ve[3])); - - if ((bad != 0) || (u4 >= d4)) + // Draw the last rows where not all 4 columns are active + for (int i = 0; i < 4; i++) { - for (z = 0; z < 4; ++z) - { - if (!(bad & 1)) - { - prevline1(vince[z],palookupoffse[z],palookuplight[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+x+z)*pixelsize+dc_destorg); - } - bad >>= 1; - } - continue; - } + if (!fixed) + R_SetColorMapLight(basecolormap, lights[i], wallshade); - for (z = 0; z < 4; ++z) - { - if (u4 > y1ve[z]) - { - vplce[z] = prevline1(vince[z],palookupoffse[z], palookuplight[z],u4-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+x+z)*pixelsize+dc_destorg); - } - } - - if (d4 > u4) - { - dc_count = d4-u4; - dc_dest = (ylookup[u4]+x)*pixelsize+dc_destorg; - dovline4(); - } - - BYTE *i = (x+ylookup[d4])*pixelsize+dc_destorg; - for (z = 0; z < 4; ++z) - { - if (y2ve[z] > d4) - { - prevline1(vince[z],palookupoffse[0],palookuplight[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z*pixelsize); - } + if (middle_y2 < y2[i]) + uv_pos[i] = wallscan_drawcol1(x + i, middle_y2, y2[i], uv_pos[i], uv_step[i], uv_max, source[i], draw1column); } } - for(;x> FRACBITS); - dc_dest = (ylookup[y1ve[0]] + x) * pixelsize + dc_destorg; - dc_count = y2ve[0] - y1ve[0]; - iscale = swal[x] * yrepeat; - dc_iscale = xs_ToFixed(fracbits, iscale); - dc_texturefrac = xs_ToFixed(fracbits, dc_texturemid + iscale * (y1ve[0] - CenterY + 0.5)); + const BYTE *source = getcol(rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dovline1(); + uint32_t uv_start, uv_step; + calc_uv_start_and_step(y1, swal[x], yrepeat, uv_height, fracbits, uv_start, uv_step); + + wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, source, draw1column); } -//unclock (WallScanCycles); - NetUpdate (); } +void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) +{ + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + { + setupvline(bits); + line1 = dovline1; + line4 = dovline4; + }); +} + +void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) +{ + if (!rw_pic->bMasked) // Textures that aren't masked can use the faster wallscan. + { + wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); + } + else + { + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + { + setupmvline(bits); + line1 = domvline1; + line4 = domvline4; + }); + } +} + +void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) +{ + static fixed_t(*tmvline1)(); + static void(*tmvline4)(); + if (!R_GetTransMaskDrawers(&tmvline1, &tmvline4)) + { + // The current translucency is unsupported, so draw with regular maskwallscan instead. + maskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); + } + else + { + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + { + setuptmvline(bits); + line1 = reinterpret_cast(tmvline1); + line4 = tmvline4; + }); + } +} + void wallscan_striped (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat) { FDynamicColormap *startcolormap = basecolormap; @@ -1331,507 +1478,16 @@ static void call_wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, } } -//============================================================================= -// -// wallscan_np2 -// -// This is a wrapper around wallscan that helps it tile textures whose heights -// are not powers of 2. It divides the wall into texture-sized strips and calls -// wallscan for each of those. Since only one repetition of the texture fits -// in each strip, wallscan will not tile. -// -//============================================================================= - +// wallscan now tiles with non-power-of-two textures - this function is therefore not needed anymore.. void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, bool mask) { - if (!r_np2) - { - call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, mask); - } - else - { - short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; - short *up, *down; - double texheight = rw_pic->GetHeight(); - double partition; - double scaledtexheight = texheight / yrepeat; - - if (yrepeat >= 0) - { // normal orientation: draw strips from top to bottom - partition = top - fmod(top - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); - if (partition == top) - { - partition -= scaledtexheight; - } - up = uwal; - down = most1; - dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; - while (partition > bot) - { - int j = OWallMost(most3, partition - ViewPos.Z, &WallC); - if (j != 3) - { - for (int j = x1; j < x2; ++j) - { - down[j] = clamp(most3[j], up[j], dwal[j]); - } - call_wallscan(x1, x2, up, down, swal, lwal, yrepeat, mask); - up = down; - down = (down == most1) ? most2 : most1; - } - partition -= scaledtexheight; - dc_texturemid -= texheight; - } - call_wallscan(x1, x2, up, dwal, swal, lwal, yrepeat, mask); - } - else - { // upside down: draw strips from bottom to top - partition = bot - fmod(bot - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); - up = most1; - down = dwal; - dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; - while (partition < top) - { - int j = OWallMost(most3, partition - ViewPos.Z, &WallC); - if (j != 12) - { - for (int j = x1; j < x2; ++j) - { - up[j] = clamp(most3[j], uwal[j], down[j]); - } - call_wallscan(x1, x2, up, down, swal, lwal, yrepeat, mask); - down = up; - up = (up == most1) ? most2 : most1; - } - partition -= scaledtexheight; - dc_texturemid -= texheight; - } - call_wallscan(x1, x2, uwal, down, swal, lwal, yrepeat, mask); - } - } + call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, mask); } +// wallscan now tiles with non-power-of-two textures - this function is therefore not needed anymore.. static void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat) { - if (rw_pic->GetHeight() != 1 << rw_pic->HeightBits) - { - double frontcz1 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v1); - double frontfz1 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v1); - double frontcz2 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v2); - double frontfz2 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v2); - double top = MAX(frontcz1, frontcz2); - double bot = MIN(frontfz1, frontfz2); - if (fake3D & FAKE3D_CLIPTOP) - { - top = MIN(top, sclipTop); - } - if (fake3D & FAKE3D_CLIPBOTTOM) - { - bot = MAX(bot, sclipBottom); - } - wallscan_np2(x1, x2, uwal, dwal, swal, lwal, yrepeat, top, bot, true); - } - else - { - call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, true); - } -} - -inline fixed_t mvline1 (fixed_t vince, BYTE *colormap, fixed_t light, int count, fixed_t vplce, const BYTE *bufplce, BYTE *dest) -{ - dc_iscale = vince; - dc_colormap = colormap; - dc_light = light; - dc_count = count; - dc_texturefrac = vplce; - dc_source = bufplce; - dc_dest = dest; - return domvline1 (); -} - -void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, - double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) -{ - int x, fracbits; - BYTE *pixel; - int pixelsize, pixelshift; - int y1ve[4], y2ve[4], u4, d4, startx, dax, z; - char bad; - float light = rw_light - rw_lightstep; - SDWORD xoffset; - BYTE *basecolormapdata; - double iscale; - - if (rw_pic->UseType == FTexture::TEX_Null) - { - return; - } - - if (!rw_pic->bMasked) - { // Textures that aren't masked can use the faster wallscan. - wallscan (x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); - return; - } - -//extern cycle_t WallScanCycles; -//clock (WallScanCycles); - - pixelsize = r_swtruecolor ? 4 : 1; - pixelshift = r_swtruecolor ? 2 : 0; - - rw_pic->GetHeight(); // Make sure texture size is loaded - fracbits = 32- rw_pic->HeightBits; - setupmvline(fracbits); - xoffset = rw_offset; - basecolormapdata = basecolormap->Maps; - - x = startx = x1; - pixel = x * pixelsize + dc_destorg; - - bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); - if (fixed) - { - palookupoffse[0] = dc_colormap; - palookupoffse[1] = dc_colormap; - palookupoffse[2] = dc_colormap; - palookupoffse[3] = dc_colormap; - palookuplight[0] = 0; - palookuplight[1] = 0; - palookuplight[2] = 0; - palookuplight[3] = 0; - } - - if (fixedcolormap) - R_SetColorMapLight(fixedcolormap, 0, 0); - else - R_SetColorMapLight(basecolormap, 0, 0); - - for(; (x < x2) && (((size_t)pixel >> pixelshift) & 3); ++x, pixel += pixelsize) - { - light += rw_lightstep; - y1ve[0] = uwal[x];//max(uwal[x],umost[x]); - y2ve[0] = dwal[x];//min(dwal[x],dmost[x]); - if (y2ve[0] <= y1ve[0]) continue; - - if (!fixed) - { // calculate lighting - R_SetColorMapLight(basecolormap, light, wallshade); - } - - dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dc_dest = ylookup[y1ve[0]] * pixelsize + pixel; - dc_count = y2ve[0] - y1ve[0]; - iscale = swal[x] * yrepeat; - dc_iscale = xs_ToFixed(fracbits, iscale); - dc_texturefrac = xs_ToFixed(fracbits, dc_texturemid + iscale * (y1ve[0] - CenterY + 0.5)); - - domvline1(); - } - - for(; x < x2-3; x += 4, pixel += 4 * pixelsize) - { - bad = 0; - for (z = 3, dax = x+3; z >= 0; --z, --dax) - { - y1ve[z] = uwal[dax]; - y2ve[z] = dwal[dax]; - if (y2ve[z] <= y1ve[z]) { bad += 1<> FRACBITS); - iscale = swal[dax] * yrepeat; - vince[z] = xs_ToFixed(fracbits, iscale); - vplce[z] = xs_ToFixed(fracbits, dc_texturemid + iscale * (y1ve[z] - CenterY + 0.5)); - } - if (bad == 15) - { - light += rw_lightstep * 4; - continue; - } - - if (!fixed) - { - for (z = 0; z < 4; ++z) - { - light += rw_lightstep; - if (r_swtruecolor) - { - palookupoffse[z] = basecolormapdata; - palookuplight[z] = LIGHTSCALE(light, wallshade); - } - else - { - palookupoffse[z] = basecolormapdata + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - palookuplight[z] = 0; - } - } - } - - u4 = MAX(MAX(y1ve[0],y1ve[1]),MAX(y1ve[2],y1ve[3])); - d4 = MIN(MIN(y2ve[0],y2ve[1]),MIN(y2ve[2],y2ve[3])); - - if ((bad != 0) || (u4 >= d4)) - { - for (z = 0; z < 4; ++z) - { - if (!(bad & 1)) - { - mvline1(vince[z],palookupoffse[z],palookuplight[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); - } - bad >>= 1; - } - continue; - } - - for (z = 0; z < 4; ++z) - { - if (u4 > y1ve[z]) - { - vplce[z] = mvline1(vince[z],palookupoffse[z],palookuplight[z],u4-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); - } - } - - if (d4 > u4) - { - dc_count = d4-u4; - dc_dest = ylookup[u4]*pixelsize+pixel; - domvline4(); - } - - BYTE *i = pixel+ylookup[d4]*pixelsize; - for (z = 0; z < 4; ++z) - { - if (y2ve[z] > d4) - { - mvline1(vince[z],palookupoffse[0],palookuplight[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z*pixelsize); - } - } - } - for(; x < x2; ++x, pixel += pixelsize) - { - light += rw_lightstep; - y1ve[0] = uwal[x]; - y2ve[0] = dwal[x]; - if (y2ve[0] <= y1ve[0]) continue; - - if (!fixed) - { // calculate lighting - R_SetColorMapLight(basecolormap, light, wallshade); - } - - dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dc_dest = ylookup[y1ve[0]]*pixelsize + pixel; - dc_count = y2ve[0] - y1ve[0]; - iscale = swal[x] * yrepeat; - dc_iscale = xs_ToFixed(fracbits, iscale); - dc_texturefrac = xs_ToFixed(fracbits, dc_texturemid + iscale * (y1ve[0] - CenterY + 0.5)); - - domvline1(); - } - -//unclock(WallScanCycles); - - NetUpdate (); -} - -inline void preptmvline1 (fixed_t vince, BYTE *colormap, fixed_t light, int count, fixed_t vplce, const BYTE *bufplce, BYTE *dest) -{ - dc_iscale = vince; - dc_colormap = colormap; - dc_light = light; - dc_count = count; - dc_texturefrac = vplce; - dc_source = bufplce; - dc_dest = dest; -} - -void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, - double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) -{ - fixed_t (*tmvline1)(); - void (*tmvline4)(); - int x, fracbits; - BYTE *pixel; - int pixelsize, pixelshift; - int y1ve[4], y2ve[4], u4, d4, startx, dax, z; - char bad; - float light = rw_light - rw_lightstep; - SDWORD xoffset; - BYTE *basecolormapdata; - double iscale; - - if (rw_pic->UseType == FTexture::TEX_Null) - { - return; - } - - if (!R_GetTransMaskDrawers (&tmvline1, &tmvline4)) - { - // The current translucency is unsupported, so draw with regular maskwallscan instead. - maskwallscan (x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); - return; - } - -//extern cycle_t WallScanCycles; -//clock (WallScanCycles); - - pixelsize = r_swtruecolor ? 4 : 1; - pixelshift = r_swtruecolor ? 2 : 0; - - rw_pic->GetHeight(); // Make sure texture size is loaded - fracbits = 32 - rw_pic->HeightBits; - setuptmvline(fracbits); - xoffset = rw_offset; - basecolormapdata = basecolormap->Maps; - fixed_t centeryfrac = FLOAT2FIXED(CenterY); - - x = startx = x1; - pixel = x * pixelsize + dc_destorg; - - bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); - if (fixed) - { - palookupoffse[0] = dc_colormap; - palookupoffse[1] = dc_colormap; - palookupoffse[2] = dc_colormap; - palookupoffse[3] = dc_colormap; - palookuplight[0] = 0; - palookuplight[1] = 0; - palookuplight[2] = 0; - palookuplight[3] = 0; - } - - if (fixedcolormap) - R_SetColorMapLight(fixedcolormap, 0, 0); - else - R_SetColorMapLight(basecolormap, 0, 0); - - for(; (x < x2) && (((size_t)pixel >> pixelshift) & 3); ++x, pixel += pixelsize) - { - light += rw_lightstep; - y1ve[0] = uwal[x];//max(uwal[x],umost[x]); - y2ve[0] = dwal[x];//min(dwal[x],dmost[x]); - if (y2ve[0] <= y1ve[0]) continue; - - if (!fixed) - { // calculate lighting - R_SetColorMapLight(basecolormap, light, wallshade); - } - - dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dc_dest = ylookup[y1ve[0]] * pixelsize + pixel; - dc_count = y2ve[0] - y1ve[0]; - iscale = swal[x] * yrepeat; - dc_iscale = xs_ToFixed(fracbits, iscale); - dc_texturefrac = xs_ToFixed(fracbits, dc_texturemid + iscale * (y1ve[0] - CenterY + 0.5)); - - tmvline1(); - } - - for(; x < x2-3; x += 4, pixel += 4 * pixelsize) - { - bad = 0; - for (z = 3, dax = x+3; z >= 0; --z, --dax) - { - y1ve[z] = uwal[dax]; - y2ve[z] = dwal[dax]; - if (y2ve[z] <= y1ve[z]) { bad += 1<> FRACBITS); - iscale = swal[dax] * yrepeat; - vince[z] = xs_ToFixed(fracbits, iscale); - vplce[z] = xs_ToFixed(fracbits, dc_texturemid + vince[z] * (y1ve[z] - CenterY + 0.5)); - } - if (bad == 15) - { - light += rw_lightstep * 4; - continue; - } - - if (!fixed) - { - for (z = 0; z < 4; ++z) - { - light += rw_lightstep; - if (r_swtruecolor) - { - palookupoffse[z] = basecolormapdata; - palookuplight[z] = LIGHTSCALE(light, wallshade); - } - else - { - palookupoffse[z] = basecolormapdata + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - } - } - } - - u4 = MAX(MAX(y1ve[0],y1ve[1]),MAX(y1ve[2],y1ve[3])); - d4 = MIN(MIN(y2ve[0],y2ve[1]),MIN(y2ve[2],y2ve[3])); - - if ((bad != 0) || (u4 >= d4)) - { - for (z = 0; z < 4; ++z) - { - if (!(bad & 1)) - { - preptmvline1(vince[z],palookupoffse[z],palookuplight[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); - tmvline1(); - } - bad >>= 1; - } - continue; - } - - for (z = 0; z < 4; ++z) - { - if (u4 > y1ve[z]) - { - preptmvline1(vince[z],palookupoffse[z],palookuplight[z],u4-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); - vplce[z] = tmvline1(); - } - } - - if (d4 > u4) - { - dc_count = d4-u4; - dc_dest = ylookup[u4]*pixelsize+pixel; - tmvline4(); - } - - BYTE *i = pixel+ylookup[d4]*pixelsize; - for (z = 0; z < 4; ++z) - { - if (y2ve[z] > d4) - { - preptmvline1(vince[z],palookupoffse[0],palookuplight[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z*pixelsize); - tmvline1(); - } - } - } - for(; x < x2; ++x, pixel += pixelsize) - { - light += rw_lightstep; - y1ve[0] = uwal[x]; - y2ve[0] = dwal[x]; - if (y2ve[0] <= y1ve[0]) continue; - - if (!fixed) - { // calculate lighting - R_SetColorMapLight(basecolormap, light, wallshade); - } - - dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dc_dest = ylookup[y1ve[0]] * pixelsize + pixel; - dc_count = y2ve[0] - y1ve[0]; - iscale = swal[x] * yrepeat; - dc_iscale = xs_ToFixed(fracbits, iscale); - dc_texturefrac = xs_ToFixed(fracbits, dc_texturemid + iscale * (y1ve[0] - CenterY + 0.5)); - - tmvline1(); - } - -//unclock(WallScanCycles); - - NetUpdate (); + call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, true); } // From 05b6fe6174147ceef8c64ad83a1eab2736080c3e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 10 Jun 2016 13:50:34 +0200 Subject: [PATCH 24/94] Added true color texture support for walls and floors --- src/r_draw.cpp | 7 +- src/r_draw.h | 22 ++- src/r_draw_rgba.cpp | 303 +++++++++++++++++++----------------- src/r_main.h | 46 ++++++ src/r_plane.cpp | 104 +++++++++---- src/r_segs.cpp | 4 + src/textures/pngtexture.cpp | 162 ++++++++++++++++++- src/textures/texture.cpp | 28 ++++ src/textures/textures.h | 12 +- 9 files changed, 508 insertions(+), 180 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 984a74f3f..2710b9992 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -2265,9 +2265,12 @@ const BYTE *R_GetColumn (FTexture *tex, int col) { col = width + (col % width); } - return tex->GetColumn (col, NULL); -} + if (r_swtruecolor) + return (const BYTE *)tex->GetColumnBgra(col, NULL); + else + return tex->GetColumn(col, NULL); +} // [RH] Initialize the column drawer pointers void R_InitColumnDrawers () diff --git a/src/r_draw.h b/src/r_draw.h index bf73c9dfb..3f97a7a65 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -520,6 +520,9 @@ class DrawerCommandQueue std::condition_variable end_condition; int finished_threads = 0; + bool no_threading = false; + DrawerThread single_core_thread; + void StartThreads(); void StopThreads(); @@ -535,11 +538,20 @@ public: template static void QueueCommand(Types &&... args) { - void *ptr = AllocMemory(sizeof(T)); - T *command = new (ptr)T(std::forward(args)...); - if (!command) - return; - Instance()->commands.push_back(command); + auto queue = Instance(); + if (queue->no_threading) + { + T command(std::forward(args)...); + command.Execute(&queue->single_core_thread); + } + else + { + void *ptr = AllocMemory(sizeof(T)); + if (!ptr) + return; + T *command = new (ptr)T(std::forward(args)...); + queue->commands.push_back(command); + } } // Wait until all worker threads finished executing commands diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 489716e1f..528c3c986 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -85,6 +85,8 @@ void* DrawerCommandQueue::AllocMemory(size_t size) void DrawerCommandQueue::Finish() { auto queue = Instance(); + if (queue->commands.empty()) + return; // Give worker threads something to do: @@ -190,8 +192,8 @@ class DrawColumnRGBACommand : public DrawerCommand { int dc_count; BYTE *dc_dest; - fixed_t dc_texturefrac; - fixed_t dc_iscale; + DWORD dc_texturefrac; + DWORD dc_iscale; fixed_t dc_light; const BYTE *dc_source; int dc_pitch; @@ -628,8 +630,8 @@ class DrawAddColumnRGBACommand : public DrawerCommand { int dc_count; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; const BYTE *dc_source; int dc_pitch; fixed_t dc_light; @@ -708,8 +710,8 @@ class DrawTranslatedColumnRGBACommand : public DrawerCommand fixed_t dc_light; ShadeConstants dc_shade_constants; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; BYTE *dc_translation; const BYTE *dc_source; int dc_pitch; @@ -769,8 +771,8 @@ class DrawTlatedAddColumnRGBACommand : public DrawerCommand fixed_t dc_light; ShadeConstants dc_shade_constants; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; BYTE *dc_translation; const BYTE *dc_source; int dc_pitch; @@ -845,8 +847,8 @@ class DrawShadedColumnRGBACommand : public DrawerCommand private: int dc_count; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; fixed_t dc_light; const BYTE *dc_source; lighttable_t *dc_colormap; @@ -918,8 +920,8 @@ class DrawAddClampColumnRGBACommand : public DrawerCommand { int dc_count; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; const BYTE *dc_source; int dc_pitch; fixed_t dc_light; @@ -994,8 +996,8 @@ class DrawAddClampTranslatedColumnRGBACommand : public DrawerCommand { int dc_count; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; BYTE *dc_translation; const BYTE *dc_source; int dc_pitch; @@ -1073,8 +1075,8 @@ class DrawSubClampColumnRGBACommand : public DrawerCommand { int dc_count; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; const BYTE *dc_source; int dc_pitch; fixed_t dc_light; @@ -1149,8 +1151,8 @@ class DrawSubClampTranslatedColumnRGBACommand : public DrawerCommand { int dc_count; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; const BYTE *dc_source; int dc_pitch; fixed_t dc_light; @@ -1228,8 +1230,8 @@ class DrawRevSubClampColumnRGBACommand : public DrawerCommand { int dc_count; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; const BYTE *dc_source; int dc_pitch; fixed_t dc_light; @@ -1303,8 +1305,8 @@ class DrawRevSubClampTranslatedColumnRGBACommand : public DrawerCommand { int dc_count; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; const BYTE *dc_source; int dc_pitch; fixed_t dc_light; @@ -1380,7 +1382,7 @@ public: class DrawSpanRGBACommand : public DrawerCommand { - const BYTE *ds_source; + const uint32_t *ds_source; fixed_t ds_xfrac; fixed_t ds_yfrac; fixed_t ds_xstep; @@ -1397,7 +1399,7 @@ class DrawSpanRGBACommand : public DrawerCommand public: DrawSpanRGBACommand() { - ds_source = ::ds_source; + ds_source = (const uint32_t*)::ds_source; ds_xfrac = ::ds_xfrac; ds_yfrac = ::ds_yfrac; ds_xstep = ::ds_xstep; @@ -1423,7 +1425,7 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const BYTE* source = ds_source; + const uint32_t* source = ds_source; int count; int spot; @@ -1450,7 +1452,7 @@ public: spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); // Lookup pixel from flat texture tile - *dest++ = shade_pal_index(source[spot], light, shade_constants); + *dest++ = shade_bgra(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -1469,7 +1471,7 @@ public: spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); // Lookup pixel from flat texture tile - *dest++ = shade_pal_index(source[spot], light, shade_constants); + *dest++ = shade_bgra(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -1488,7 +1490,7 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const BYTE* source = ds_source; + const uint32_t* source = ds_source; int count; int spot; @@ -1598,7 +1600,7 @@ public: spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); // Lookup pixel from flat texture tile - *dest++ = shade_pal_index(source[spot], light, shade_constants); + *dest++ = shade_bgra(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -1617,7 +1619,7 @@ public: spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); // Lookup pixel from flat texture tile - *dest++ = shade_pal_index(source[spot], light, shade_constants); + *dest++ = shade_bgra(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -1630,7 +1632,7 @@ public: class DrawSpanMaskedRGBACommand : public DrawerCommand { - const BYTE *ds_source; + const uint32_t *ds_source; fixed_t ds_light; ShadeConstants ds_shade_constants; fixed_t ds_xfrac; @@ -1647,7 +1649,7 @@ class DrawSpanMaskedRGBACommand : public DrawerCommand public: DrawSpanMaskedRGBACommand() { - ds_source = ::ds_source; + ds_source = (const uint32_t*)::ds_source; ds_light = ::ds_light; ds_shade_constants = ::ds_shade_constants; ds_xfrac = ::ds_xfrac; @@ -1672,7 +1674,7 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const BYTE* source = ds_source; + const uint32_t* source = ds_source; int count; int spot; @@ -1694,13 +1696,13 @@ public: // 64x64 is the most common case by far, so special case it. do { - BYTE texdata; + uint32_t texdata; spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); texdata = source[spot]; if (texdata != 0) { - *dest = shade_pal_index(texdata, light, shade_constants); + *dest = shade_bgra(texdata, light, shade_constants); } dest++; xfrac += xstep; @@ -1714,13 +1716,13 @@ public: int xmask = ((1 << ds_xbits) - 1) << ds_ybits; do { - BYTE texdata; + uint32_t texdata; spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); texdata = source[spot]; if (texdata != 0) { - *dest = shade_pal_index(texdata, light, shade_constants); + *dest = shade_bgra(texdata, light, shade_constants); } dest++; xfrac += xstep; @@ -1732,7 +1734,7 @@ public: class DrawSpanTranslucentRGBACommand : public DrawerCommand { - const BYTE *ds_source; + const uint32_t *ds_source; fixed_t ds_light; ShadeConstants ds_shade_constants; fixed_t ds_xfrac; @@ -1749,7 +1751,7 @@ class DrawSpanTranslucentRGBACommand : public DrawerCommand public: DrawSpanTranslucentRGBACommand() { - ds_source = ::ds_source; + ds_source = (const uint32_t *)::ds_source; ds_light = ::ds_light; ds_shade_constants = ::ds_shade_constants; ds_xfrac = ::ds_xfrac; @@ -1774,7 +1776,7 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const BYTE* source = ds_source; + const uint32_t* source = ds_source; int count; int spot; @@ -1801,7 +1803,7 @@ public: { spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t fg = shade_pal_index(source[spot], light, shade_constants); + uint32_t fg = shade_bgra(source[spot], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -1829,7 +1831,7 @@ public: { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t fg = shade_pal_index(source[spot], light, shade_constants); + uint32_t fg = shade_bgra(source[spot], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -1853,7 +1855,7 @@ public: class DrawSpanMaskedTranslucentRGBACommand : public DrawerCommand { - const BYTE *ds_source; + const uint32_t *ds_source; fixed_t ds_light; ShadeConstants ds_shade_constants; fixed_t ds_xfrac; @@ -1870,7 +1872,7 @@ class DrawSpanMaskedTranslucentRGBACommand : public DrawerCommand public: DrawSpanMaskedTranslucentRGBACommand() { - ds_source = ::ds_source; + ds_source = (const uint32_t*)::ds_source; ds_light = ::ds_light; ds_shade_constants = ::ds_shade_constants; ds_xfrac = ::ds_xfrac; @@ -1895,7 +1897,7 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const BYTE* source = ds_source; + const uint32_t* source = ds_source; int count; int spot; @@ -1920,13 +1922,13 @@ public: // 64x64 is the most common case by far, so special case it. do { - BYTE texdata; + uint32_t texdata; spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); texdata = source[spot]; if (texdata != 0) { - uint32_t fg = shade_pal_index(texdata, light, shade_constants); + uint32_t fg = shade_bgra(texdata, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -1953,13 +1955,13 @@ public: int xmask = ((1 << ds_xbits) - 1) << ds_ybits; do { - BYTE texdata; + uint32_t texdata; spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); texdata = source[spot]; if (texdata != 0) { - uint32_t fg = shade_pal_index(texdata, light, shade_constants); + uint32_t fg = shade_bgra(texdata, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -1984,7 +1986,7 @@ public: class DrawSpanAddClampRGBACommand : public DrawerCommand { - const BYTE *ds_source; + const uint32_t *ds_source; fixed_t ds_light; ShadeConstants ds_shade_constants; fixed_t ds_xfrac; @@ -2001,7 +2003,7 @@ class DrawSpanAddClampRGBACommand : public DrawerCommand public: DrawSpanAddClampRGBACommand() { - ds_source = ::ds_source; + ds_source = (const uint32_t*)::ds_source; ds_light = ::ds_light; ds_shade_constants = ::ds_shade_constants; ds_xfrac = ::ds_xfrac; @@ -2026,7 +2028,7 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const BYTE* source = ds_source; + const uint32_t* source = ds_source; int count; int spot; @@ -2053,7 +2055,7 @@ public: { spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t fg = shade_pal_index(source[spot], light, shade_constants); + uint32_t fg = shade_bgra(source[spot], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2081,7 +2083,7 @@ public: { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t fg = shade_pal_index(source[spot], light, shade_constants); + uint32_t fg = shade_bgra(source[spot], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2105,7 +2107,7 @@ public: class DrawSpanMaskedAddClampRGBACommand : public DrawerCommand { - const BYTE *ds_source; + const uint32_t *ds_source; fixed_t ds_light; ShadeConstants ds_shade_constants; fixed_t ds_xfrac; @@ -2122,7 +2124,7 @@ class DrawSpanMaskedAddClampRGBACommand : public DrawerCommand public: DrawSpanMaskedAddClampRGBACommand() { - ds_source = ::ds_source; + ds_source = (const uint32_t*)::ds_source; ds_light = ::ds_light; ds_shade_constants = ::ds_shade_constants; ds_xfrac = ::ds_xfrac; @@ -2147,7 +2149,7 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const BYTE* source = ds_source; + const uint32_t* source = ds_source; int count; int spot; @@ -2172,13 +2174,13 @@ public: // 64x64 is the most common case by far, so special case it. do { - BYTE texdata; + uint32_t texdata; spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); texdata = source[spot]; if (texdata != 0) { - uint32_t fg = shade_pal_index(texdata, light, shade_constants); + uint32_t fg = shade_bgra(texdata, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2205,13 +2207,13 @@ public: int xmask = ((1 << ds_xbits) - 1) << ds_ybits; do { - BYTE texdata; + uint32_t texdata; spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); texdata = source[spot]; if (texdata != 0) { - uint32_t fg = shade_pal_index(texdata, light, shade_constants); + uint32_t fg = shade_bgra(texdata, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2270,8 +2272,8 @@ public: class Vlinec1RGBACommand : public DrawerCommand { - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; int dc_count; const BYTE *dc_source; BYTE *dc_dest; @@ -2302,7 +2304,7 @@ public: DWORD fracstep = dc_iscale * thread->num_cores; DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const BYTE *source = dc_source; + const uint32 *source = (const uint32 *)dc_source; uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = vlinebits; int pitch = dc_pitch * thread->num_cores; @@ -2312,7 +2314,7 @@ public: do { - *dest = shade_pal_index(source[frac >> bits], light, shade_constants); + *dest = shade_bgra(source[frac >> bits], light, shade_constants); frac += fracstep; dest += pitch; } while (--count); @@ -2329,7 +2331,7 @@ class Vlinec4RGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const BYTE *bufplce[4]; + const uint32 *bufplce[4]; public: Vlinec4RGBACommand() @@ -2344,7 +2346,7 @@ public: palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; - bufplce[i] = ::bufplce[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; } } @@ -2378,10 +2380,10 @@ public: do { - dest[0] = shade_pal_index(bufplce[0][(place = local_vplce[0]) >> bits], light0, shade_constants); local_vplce[0] = place + local_vince[0]; - dest[1] = shade_pal_index(bufplce[1][(place = local_vplce[1]) >> bits], light1, shade_constants); local_vplce[1] = place + local_vince[1]; - dest[2] = shade_pal_index(bufplce[2][(place = local_vplce[2]) >> bits], light2, shade_constants); local_vplce[2] = place + local_vince[2]; - dest[3] = shade_pal_index(bufplce[3][(place = local_vplce[3]) >> bits], light3, shade_constants); local_vplce[3] = place + local_vince[3]; + dest[0] = shade_bgra(bufplce[0][(place = local_vplce[0]) >> bits], light0, shade_constants); local_vplce[0] = place + local_vince[0]; + dest[1] = shade_bgra(bufplce[1][(place = local_vplce[1]) >> bits], light1, shade_constants); local_vplce[1] = place + local_vince[1]; + dest[2] = shade_bgra(bufplce[2][(place = local_vplce[2]) >> bits], light2, shade_constants); local_vplce[2] = place + local_vince[2]; + dest[3] = shade_bgra(bufplce[3][(place = local_vplce[3]) >> bits], light3, shade_constants); local_vplce[3] = place + local_vince[3]; dest += pitch; } while (--count); } @@ -2403,7 +2405,6 @@ public: ShadeConstants shade_constants = dc_shade_constants; - uint32_t *palette = (uint32_t*)GPalette.BaseColors; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; int skipped = thread->skipped_by_thread(dc_dest_y); @@ -2423,17 +2424,17 @@ public: DWORD place2 = local_vplce[2]; DWORD place3 = local_vplce[3]; - BYTE p0 = bufplce[0][place0 >> bits]; - BYTE p1 = bufplce[1][place1 >> bits]; - BYTE p2 = bufplce[2][place2 >> bits]; - BYTE p3 = bufplce[3][place3 >> bits]; + uint32_t p0 = bufplce[0][place0 >> bits]; + uint32_t p1 = bufplce[1][place1 >> bits]; + uint32_t p2 = bufplce[2][place2 >> bits]; + uint32_t p3 = bufplce[3][place3 >> bits]; local_vplce[0] = place0 + local_vince[0]; local_vplce[1] = place1 + local_vince[1]; local_vplce[2] = place2 + local_vince[2]; local_vplce[3] = place3 + local_vince[3]; - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); SSE_SHADE_SIMPLE(fg); _mm_storeu_si128((__m128i*)dest, fg); dest += pitch; @@ -2449,17 +2450,17 @@ public: DWORD place2 = local_vplce[2]; DWORD place3 = local_vplce[3]; - BYTE p0 = bufplce[0][place0 >> bits]; - BYTE p1 = bufplce[1][place1 >> bits]; - BYTE p2 = bufplce[2][place2 >> bits]; - BYTE p3 = bufplce[3][place3 >> bits]; + uint32_t p0 = bufplce[0][place0 >> bits]; + uint32_t p1 = bufplce[1][place1 >> bits]; + uint32_t p2 = bufplce[2][place2 >> bits]; + uint32_t p3 = bufplce[3][place3 >> bits]; local_vplce[0] = place0 + local_vince[0]; local_vplce[1] = place1 + local_vince[1]; local_vplce[2] = place2 + local_vince[2]; local_vplce[3] = place3 + local_vince[3]; - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); SSE_SHADE(fg, shade_constants); _mm_storeu_si128((__m128i*)dest, fg); dest += pitch; @@ -2471,8 +2472,8 @@ public: class Mvlinec1RGBACommand : public DrawerCommand { - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; int dc_count; const BYTE *dc_source; BYTE *dc_dest; @@ -2503,7 +2504,7 @@ public: DWORD fracstep = dc_iscale * thread->num_cores; DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const BYTE *source = dc_source; + const uint32 *source = (const uint32 *)dc_source; uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = mvlinebits; int pitch = dc_pitch * thread->num_cores; @@ -2513,10 +2514,10 @@ public: do { - BYTE pix = source[frac >> bits]; + uint32_t pix = source[frac >> bits]; if (pix != 0) { - *dest = shade_pal_index(pix, light, shade_constants); + *dest = shade_bgra(pix, light, shade_constants); } frac += fracstep; dest += pitch; @@ -2534,7 +2535,7 @@ class Mvlinec4RGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const BYTE *bufplce[4]; + const uint32 *bufplce[4]; public: Mvlinec4RGBACommand() @@ -2549,7 +2550,7 @@ public: palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; - bufplce[i] = ::bufplce[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; } } @@ -2583,11 +2584,11 @@ public: do { - BYTE pix; - pix = bufplce[0][(place = local_vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(pix, light0, shade_constants); local_vplce[0] = place + local_vince[0]; - pix = bufplce[1][(place = local_vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(pix, light1, shade_constants); local_vplce[1] = place + local_vince[1]; - pix = bufplce[2][(place = local_vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(pix, light2, shade_constants); local_vplce[2] = place + local_vince[2]; - pix = bufplce[3][(place = local_vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(pix, light3, shade_constants); local_vplce[3] = place + local_vince[3]; + uint32_t pix; + pix = bufplce[0][(place = local_vplce[0]) >> bits]; if (pix) dest[0] = shade_bgra(pix, light0, shade_constants); local_vplce[0] = place + local_vince[0]; + pix = bufplce[1][(place = local_vplce[1]) >> bits]; if (pix) dest[1] = shade_bgra(pix, light1, shade_constants); local_vplce[1] = place + local_vince[1]; + pix = bufplce[2][(place = local_vplce[2]) >> bits]; if (pix) dest[2] = shade_bgra(pix, light2, shade_constants); local_vplce[2] = place + local_vince[2]; + pix = bufplce[3][(place = local_vplce[3]) >> bits]; if (pix) dest[3] = shade_bgra(pix, light3, shade_constants); local_vplce[3] = place + local_vince[3]; dest += pitch; } while (--count); } @@ -2609,7 +2610,6 @@ public: ShadeConstants shade_constants = dc_shade_constants; - uint32_t *palette = (uint32_t*)GPalette.BaseColors; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; int skipped = thread->skipped_by_thread(dc_dest_y); @@ -2629,10 +2629,10 @@ public: DWORD place2 = local_vplce[2]; DWORD place3 = local_vplce[3]; - BYTE pix0 = bufplce[0][place0 >> bits]; - BYTE pix1 = bufplce[1][place1 >> bits]; - BYTE pix2 = bufplce[2][place2 >> bits]; - BYTE pix3 = bufplce[3][place3 >> bits]; + uint32_t pix0 = bufplce[0][place0 >> bits]; + uint32_t pix1 = bufplce[1][place1 >> bits]; + uint32_t pix2 = bufplce[2][place2 >> bits]; + uint32_t pix3 = bufplce[3][place3 >> bits]; // movemask = !(pix == 0) __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); @@ -2642,7 +2642,7 @@ public: local_vplce[2] = place2 + local_vince[2]; local_vplce[3] = place3 + local_vince[3]; - __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); SSE_SHADE_SIMPLE(fg); _mm_maskmoveu_si128(fg, movemask, (char*)dest); dest += pitch; @@ -2658,10 +2658,10 @@ public: DWORD place2 = local_vplce[2]; DWORD place3 = local_vplce[3]; - BYTE pix0 = bufplce[0][place0 >> bits]; - BYTE pix1 = bufplce[1][place1 >> bits]; - BYTE pix2 = bufplce[2][place2 >> bits]; - BYTE pix3 = bufplce[3][place3 >> bits]; + uint32_t pix0 = bufplce[0][place0 >> bits]; + uint32_t pix1 = bufplce[1][place1 >> bits]; + uint32_t pix2 = bufplce[2][place2 >> bits]; + uint32_t pix3 = bufplce[3][place3 >> bits]; // movemask = !(pix == 0) __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); @@ -2671,7 +2671,7 @@ public: local_vplce[2] = place2 + local_vince[2]; local_vplce[3] = place3 + local_vince[3]; - __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); SSE_SHADE(fg, shade_constants); _mm_maskmoveu_si128(fg, movemask, (char*)dest); dest += pitch; @@ -2683,8 +2683,8 @@ public: class Tmvline1AddRGBACommand : public DrawerCommand { - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; int dc_count; const BYTE *dc_source; BYTE *dc_dest; @@ -2719,7 +2719,7 @@ public: DWORD fracstep = dc_iscale * thread->num_cores; DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const BYTE *source = dc_source; + const uint32 *source = (const uint32 *)dc_source; uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = tmvlinebits; int pitch = dc_pitch * thread->num_cores; @@ -2732,10 +2732,10 @@ public: do { - BYTE pix = source[frac >> bits]; + uint32_t pix = source[frac >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(pix, light, shade_constants); + uint32_t fg = shade_bgra(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -2768,7 +2768,7 @@ class Tmvline4AddRGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const BYTE *bufplce[4]; + const uint32 *bufplce[4]; public: Tmvline4AddRGBACommand() @@ -2785,7 +2785,7 @@ public: palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; - bufplce[i] = ::bufplce[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; } } @@ -2823,10 +2823,10 @@ public: { for (int i = 0; i < 4; ++i) { - BYTE pix = bufplce[i][local_vplce[i] >> bits]; + uint32_t pix = bufplce[i][local_vplce[i] >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(pix, light[i], shade_constants); + uint32_t fg = shade_bgra(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -2850,8 +2850,8 @@ public: class Tmvline1AddClampRGBACommand : public DrawerCommand { - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; int dc_count; const BYTE *dc_source; BYTE *dc_dest; @@ -2886,7 +2886,7 @@ public: DWORD fracstep = dc_iscale * thread->num_cores; DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const BYTE *source = dc_source; + const uint32 *source = (const uint32 *)dc_source; uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = tmvlinebits; int pitch = dc_pitch * thread->num_cores; @@ -2899,10 +2899,10 @@ public: do { - BYTE pix = source[frac >> bits]; + uint32_t pix = source[frac >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(pix, light, shade_constants); + uint32_t fg = shade_bgra(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -2935,7 +2935,7 @@ class Tmvline4AddClampRGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const BYTE *bufplce[4]; + const uint32 *bufplce[4]; public: Tmvline4AddClampRGBACommand() @@ -2952,7 +2952,7 @@ public: palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; - bufplce[i] = ::bufplce[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; } } @@ -2990,10 +2990,10 @@ public: { for (int i = 0; i < 4; ++i) { - BYTE pix = bufplce[i][local_vplce[i] >> bits]; + uint32_t pix = bufplce[i][local_vplce[i] >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(pix, light[i], shade_constants); + uint32_t fg = shade_bgra(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -3017,8 +3017,8 @@ public: class Tmvline1SubClampRGBACommand : public DrawerCommand { - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; int dc_count; const BYTE *dc_source; BYTE *dc_dest; @@ -3053,7 +3053,7 @@ public: DWORD fracstep = dc_iscale * thread->num_cores; DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const BYTE *source = dc_source; + const uint32 *source = (const uint32 *)dc_source; uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = tmvlinebits; int pitch = dc_pitch * thread->num_cores; @@ -3066,10 +3066,10 @@ public: do { - BYTE pix = source[frac >> bits]; + uint32_t pix = source[frac >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(pix, light, shade_constants); + uint32_t fg = shade_bgra(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -3102,7 +3102,7 @@ class Tmvline4SubClampRGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const BYTE *bufplce[4]; + const uint32 *bufplce[4]; public: Tmvline4SubClampRGBACommand() @@ -3119,7 +3119,7 @@ public: palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; - bufplce[i] = ::bufplce[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; } } @@ -3157,10 +3157,10 @@ public: { for (int i = 0; i < 4; ++i) { - BYTE pix = bufplce[i][local_vplce[i] >> bits]; + uint32_t pix = bufplce[i][local_vplce[i] >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(pix, light[i], shade_constants); + uint32_t fg = shade_bgra(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -3184,8 +3184,8 @@ public: class Tmvline1RevSubClampRGBACommand : public DrawerCommand { - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; int dc_count; const BYTE *dc_source; BYTE *dc_dest; @@ -3220,7 +3220,7 @@ public: DWORD fracstep = dc_iscale * thread->num_cores; DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const BYTE *source = dc_source; + const uint32 *source = (const uint32 *)dc_source; uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = tmvlinebits; int pitch = dc_pitch * thread->num_cores; @@ -3233,10 +3233,10 @@ public: do { - BYTE pix = source[frac >> bits]; + uint32_t pix = source[frac >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(pix, light, shade_constants); + uint32_t fg = shade_bgra(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -3269,7 +3269,7 @@ class Tmvline4RevSubClampRGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const BYTE *bufplce[4]; + const uint32 *bufplce[4]; public: Tmvline4RevSubClampRGBACommand() @@ -3286,7 +3286,7 @@ public: palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; - bufplce[i] = ::bufplce[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; } } @@ -3324,10 +3324,10 @@ public: { for (int i = 0; i < 4; ++i) { - BYTE pix = bufplce[i][local_vplce[i] >> bits]; + uint32_t pix = bufplce[i][local_vplce[i] >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(pix, light[i], shade_constants); + uint32_t fg = shade_bgra(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -3549,8 +3549,17 @@ void R_FillSpan_RGBA() DrawerCommandQueue::QueueCommand(); } +extern FTexture *rw_pic; // For the asserts below + DWORD vlinec1_RGBA() { + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + DWORD height = rw_pic->GetHeight(); + assert((frac >> vlinebits) < height); + frac += dc_count * fracstep; + assert((frac >> vlinebits) <= height); + DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } @@ -3558,6 +3567,8 @@ DWORD vlinec1_RGBA() void vlinec4_RGBA() { DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; } DWORD mvlinec1_RGBA() @@ -3569,6 +3580,8 @@ DWORD mvlinec1_RGBA() void mvlinec4_RGBA() { DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_add_RGBA() @@ -3580,6 +3593,8 @@ fixed_t tmvline1_add_RGBA() void tmvline4_add_RGBA() { DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_addclamp_RGBA() @@ -3591,6 +3606,8 @@ fixed_t tmvline1_addclamp_RGBA() void tmvline4_addclamp_RGBA() { DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_subclamp_RGBA() @@ -3602,6 +3619,8 @@ fixed_t tmvline1_subclamp_RGBA() void tmvline4_subclamp_RGBA() { DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_revsubclamp_RGBA() @@ -3613,6 +3632,8 @@ fixed_t tmvline1_revsubclamp_RGBA() void tmvline4_revsubclamp_RGBA() { DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; } void R_DrawFogBoundarySection_RGBA(int y, int y2, int x1) diff --git a/src/r_main.h b/src/r_main.h index 5d4ff1174..6d0e2a21f 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -136,6 +136,19 @@ FORCEINLINE uint32_t shade_pal_index_simple(uint32_t index, uint32_t light) return 0xff000000 | (red << 16) | (green << 8) | blue; } +FORCEINLINE uint32_t shade_bgra_simple(uint32_t color, uint32_t light) +{ + uint32_t red = (color >> 16) & 0xff; + uint32_t green = (color >> 8) & 0xff; + uint32_t blue = color & 0xff; + + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + // Calculates a ARGB8 color for the given palette index, light multiplier and dynamic colormap FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants) { @@ -171,6 +184,39 @@ FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const Shade return 0xff000000 | (red << 16) | (green << 8) | blue; } +FORCEINLINE uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConstants &constants) +{ + uint32_t red = (color >> 16) & 0xff; + uint32_t green = (color >> 8) & 0xff; + uint32_t blue = color & 0xff; + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; + + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; + + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; + + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + // Calculate constants for a simple shade #define SSE_SHADE_SIMPLE_INIT(light) \ __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 26d579d6d..05fce79a6 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -973,15 +973,22 @@ extern FTexture *rw_pic; // Allow for layer skies up to 512 pixels tall. This is overkill, // since the most anyone can ever see of the sky is 500 pixels. // We need 4 skybufs because wallscan can draw up to 4 columns at a time. +// Need two versions - one for true color and one for palette static BYTE skybuf[4][512]; +static uint32_t skybuf_bgra[4][512]; static DWORD lastskycol[4]; +static DWORD lastskycol_bgra[4]; static int skycolplace; +static int skycolplace_bgra; // Get a column of sky when there is only one sky texture. static const BYTE *R_GetOneSkyColumn (FTexture *fronttex, int x) { angle_t column = (skyangle + xtoviewangle[x]) ^ skyflip; - return fronttex->GetColumn((UMulScale16(column, frontcyl) + frontpos) >> FRACBITS, NULL); + if (!r_swtruecolor) + return fronttex->GetColumn((UMulScale16(column, frontcyl) + frontpos) >> FRACBITS, NULL); + else + return (const BYTE *)fronttex->GetColumnBgra((UMulScale16(column, frontcyl) + frontpos) >> FRACBITS, NULL); } // Get a column of sky when there are two overlapping sky textures @@ -996,38 +1003,77 @@ static const BYTE *R_GetTwoSkyColumns (FTexture *fronttex, int x) DWORD skycol = (angle1 << 16) | angle2; int i; - for (i = 0; i < 4; ++i) + if (!r_swtruecolor) { - if (lastskycol[i] == skycol) + for (i = 0; i < 4; ++i) { - return skybuf[i]; + if (lastskycol[i] == skycol) + { + return skybuf[i]; + } } + + lastskycol[skycolplace] = skycol; + BYTE *composite = skybuf[skycolplace]; + skycolplace = (skycolplace + 1) & 3; + + // The ordering of the following code has been tuned to allow VC++ to optimize + // it well. In particular, this arrangement lets it keep count in a register + // instead of on the stack. + const BYTE *front = fronttex->GetColumn(angle1, NULL); + const BYTE *back = backskytex->GetColumn(angle2, NULL); + + int count = MIN(512, MIN(backskytex->GetHeight(), fronttex->GetHeight())); + i = 0; + do + { + if (front[i]) + { + composite[i] = front[i]; + } + else + { + composite[i] = back[i]; + } + } while (++i, --count); + return composite; } - - lastskycol[skycolplace] = skycol; - BYTE *composite = skybuf[skycolplace]; - skycolplace = (skycolplace + 1) & 3; - - // The ordering of the following code has been tuned to allow VC++ to optimize - // it well. In particular, this arrangement lets it keep count in a register - // instead of on the stack. - const BYTE *front = fronttex->GetColumn (angle1, NULL); - const BYTE *back = backskytex->GetColumn (angle2, NULL); - - int count = MIN (512, MIN (backskytex->GetHeight(), fronttex->GetHeight())); - i = 0; - do + else { - if (front[i]) + return R_GetOneSkyColumn(fronttex, x); + for (i = 0; i < 4; ++i) { - composite[i] = front[i]; + if (lastskycol_bgra[i] == skycol) + { + return (BYTE*)(skybuf_bgra[i]); + } } - else + + lastskycol_bgra[skycolplace_bgra] = skycol; + uint32_t *composite = skybuf_bgra[skycolplace_bgra]; + skycolplace_bgra = (skycolplace_bgra + 1) & 3; + + // The ordering of the following code has been tuned to allow VC++ to optimize + // it well. In particular, this arrangement lets it keep count in a register + // instead of on the stack. + const uint32_t *front = (const uint32_t *)fronttex->GetColumnBgra(angle1, NULL); + const uint32_t *back = (const uint32_t *)backskytex->GetColumnBgra(angle2, NULL); + + int count = MIN(512, MIN(backskytex->GetHeight(), fronttex->GetHeight())); + i = 0; + do { - composite[i] = back[i]; - } - } while (++i, --count); - return composite; + if (front[i]) + { + composite[i] = front[i]; + } + else + { + composite[i] = back[i]; + } + } while (++i, --count); + return (BYTE*)composite; + } } static void R_DrawSky (visplane_t *pl) @@ -1062,6 +1108,7 @@ static void R_DrawSky (visplane_t *pl) for (x = 0; x < 4; ++x) { lastskycol[x] = 0xffffffff; + lastskycol_bgra[x] = 0xffffffff; } rw_pic = frontskytex; @@ -1075,6 +1122,7 @@ static void R_DrawSky (visplane_t *pl) for (x = 0; x < 4; ++x) { lastskycol[x] = 0xffffffff; + lastskycol_bgra[x] = 0xffffffff; } wallscan (pl->left, pl->right, (short *)pl->top, (short *)pl->bottom, swall, lwall, frontyScale, backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); @@ -1112,6 +1160,7 @@ static void R_DrawSkyStriped (visplane_t *pl) for (x = 0; x < 4; ++x) { lastskycol[x] = 0xffffffff; + lastskycol_bgra[x] = 0xffffffff; } wallscan (pl->left, pl->right, top, bot, swall, lwall, rw_pic->Scale.Y, backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); @@ -1230,7 +1279,10 @@ void R_DrawSinglePlane (visplane_t *pl, fixed_t alpha, bool additive, bool maske R_SetupSpanBits(tex); double xscale = pl->xform.xScale * tex->Scale.X; double yscale = pl->xform.yScale * tex->Scale.Y; - ds_source = tex->GetPixels (); + if (r_swtruecolor) + ds_source = (const BYTE*)tex->GetPixelsBgra(); + else + ds_source = tex->GetPixels(); basecolormap = pl->colormap; planeshade = LIGHT2SHADE(pl->lightlevel); diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 5aa7c29a2..87ce48ec4 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1092,6 +1092,8 @@ uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv { int next_uv_wrap = (uv_max - uv_pos + uv_step - 1) / uv_step; int count = MIN(left, next_uv_wrap); + if (count <= 0) + break; // This should never happen, but it does.. if (count > 0) { @@ -1146,6 +1148,8 @@ void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_ste int next_uv_wrap = (uv_max - uv_pos[i] + uv_step[i] - 1) / uv_step[i]; count = MIN(next_uv_wrap, count); } + if (count <= 0) + break; // This should never happen, but it does.. // Draw until that column wraps if (count > 0) diff --git a/src/textures/pngtexture.cpp b/src/textures/pngtexture.cpp index e47fa62c0..95f7aca75 100644 --- a/src/textures/pngtexture.cpp +++ b/src/textures/pngtexture.cpp @@ -41,6 +41,7 @@ #include "bitmap.h" #include "v_palette.h" #include "textures/textures.h" +#include //========================================================================== // @@ -56,6 +57,7 @@ public: const BYTE *GetColumn (unsigned int column, const Span **spans_out); const BYTE *GetPixels (); + const uint32_t *GetPixelsBgra (); void Unload (); FTextureFormat GetFormat (); int CopyTrueColorPixels(FBitmap *bmp, int x, int y, int rotate, FCopyInfo *inf = NULL); @@ -65,6 +67,7 @@ protected: FString SourceFile; BYTE *Pixels; + std::vector PixelsBgra; Span **Spans; BYTE BitDepth; @@ -73,11 +76,13 @@ protected: bool HaveTrans; WORD NonPaletteTrans[3]; + std::vector PngPalette; BYTE *PaletteMap; int PaletteSize; DWORD StartOfIDAT; void MakeTexture (); + void MakeTextureBgra (); friend class FTexture; }; @@ -266,6 +271,12 @@ FPNGTexture::FPNGTexture (FileReader &lump, int lumpnum, const FString &filename { lump.Seek (len - PaletteSize * 3, SEEK_CUR); } + for (i = 0; i < PaletteSize; i++) + { + PngPalette.push_back(p.pngpal[i][0]); + PngPalette.push_back(p.pngpal[i][1]); + PngPalette.push_back(p.pngpal[i][2]); + } for (i = PaletteSize - 1; i >= 0; --i) { p.palette[i] = MAKERGB(p.pngpal[i][0], p.pngpal[i][1], p.pngpal[i][2]); @@ -369,11 +380,9 @@ FPNGTexture::~FPNGTexture () void FPNGTexture::Unload () { - if (Pixels != NULL) - { - delete[] Pixels; - Pixels = NULL; - } + delete[] Pixels; + Pixels = NULL; + PixelsBgra.clear(); } //========================================================================== @@ -446,6 +455,16 @@ const BYTE *FPNGTexture::GetPixels () return Pixels; } +const uint32_t *FPNGTexture::GetPixelsBgra() +{ + if (PixelsBgra.empty()) + { + MakeTextureBgra(); + } + return PixelsBgra.data(); +} + + //========================================================================== // // @@ -602,6 +621,139 @@ void FPNGTexture::MakeTexture () delete lump; } +void FPNGTexture::MakeTextureBgra () +{ + FileReader *lump; + + if (SourceLump >= 0) + { + lump = new FWadLump(Wads.OpenLumpNum(SourceLump)); + } + else + { + lump = new FileReader(SourceFile.GetChars()); + } + + PixelsBgra.resize(Width * Height, 0xffff0000); + if (StartOfIDAT != 0) + { + DWORD len, id; + lump->Seek (StartOfIDAT, SEEK_SET); + lump->Read(&len, 4); + lump->Read(&id, 4); + + if (ColorType == 0 || ColorType == 3) /* Grayscale and paletted */ + { + std::vector src(Width*Height); + M_ReadIDAT (lump, src.data(), Width, Height, Width, BitDepth, ColorType, Interlace, BigLong((unsigned int)len)); + + if (!PngPalette.empty()) + { + for (int x = 0; x < Width; x++) + { + for (int y = 0; y < Height; y++) + { + uint32_t r = PngPalette[src[x + y * Width] * 3 + 0]; + uint32_t g = PngPalette[src[x + y * Width] * 3 + 1]; + uint32_t b = PngPalette[src[x + y * Width] * 3 + 2]; + PixelsBgra[x * Height + y] = 0xff000000 | (r << 16) | (g << 8) | b; + } + } + } + else + { + for (int x = 0; x < Width; x++) + { + for (int y = 0; y < Height; y++) + { + uint32_t gray = src[x + y * Width]; + PixelsBgra[x * Height + y] = 0xff000000 | (gray << 16) | (gray << 8) | gray; + } + } + } + } + else /* RGB and/or Alpha present */ + { + int bytesPerPixel = ColorType == 2 ? 3 : ColorType == 4 ? 2 : 4; + BYTE *tempix = new BYTE[Width * Height * bytesPerPixel]; + BYTE *in; + uint32_t *out; + int x, y, pitch, backstep; + + M_ReadIDAT (lump, tempix, Width, Height, Width*bytesPerPixel, BitDepth, ColorType, Interlace, BigLong((unsigned int)len)); + in = tempix; + out = PixelsBgra.data(); + + // Convert from source format to paletted, column-major. + // Formats with alpha maps are reduced to only 1 bit of alpha. + switch (ColorType) + { + case 2: // RGB + pitch = Width * 3; + backstep = Height * pitch - 3; + for (x = Width; x > 0; --x) + { + for (y = Height; y > 0; --y) + { + if (!HaveTrans) + { + *out++ = 0xff000000 | (((uint32_t)in[0]) << 16) | (((uint32_t)in[1]) << 8) | ((uint32_t)in[2]); + } + else + { + if (in[0] == NonPaletteTrans[0] && + in[1] == NonPaletteTrans[1] && + in[2] == NonPaletteTrans[2]) + { + *out++ = 0; + } + else + { + *out++ = 0xff000000 | (((uint32_t)in[0]) << 16) | (((uint32_t)in[1]) << 8) | ((uint32_t)in[2]); + } + } + in += pitch; + } + in -= backstep; + } + break; + + case 4: // Grayscale + Alpha + pitch = Width * 2; + backstep = Height * pitch - 2; + for (x = Width; x > 0; --x) + { + for (y = Height; y > 0; --y) + { + uint32_t alpha = in[1]; + uint32_t gray = in[0]; + *out++ = (alpha << 24) | (gray << 16) | (gray << 8) | gray; + in += pitch; + } + in -= backstep; + } + break; + + case 6: // RGB + Alpha + pitch = Width * 4; + backstep = Height * pitch - 4; + for (x = Width; x > 0; --x) + { + for (y = Height; y > 0; --y) + { + *out++ = (((uint32_t)in[3]) << 24) | (((uint32_t)in[0]) << 16) | (((uint32_t)in[1]) << 8) | ((uint32_t)in[2]); + in += pitch; + } + in -= backstep; + } + break; + } + delete[] tempix; + } + } + delete lump; +} + //=========================================================================== // // FPNGTexture::CopyTrueColorPixels diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index 7b90c295f..1869491b1 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -45,6 +45,7 @@ #include "v_video.h" #include "m_fixed.h" #include "textures/textures.h" +#include "v_palette.h" typedef bool (*CheckFunc)(FileReader & file); typedef FTexture * (*CreateFunc)(FileReader & file, int lumpnum); @@ -175,6 +176,33 @@ FTexture::~FTexture () KillNative(); } +const uint32_t *FTexture::GetColumnBgra(unsigned int column, const Span **spans_out) +{ + const uint32_t *pixels = GetPixelsBgra(); + + column %= Width; + if (column < 0) + column += Width; + + if (spans_out != nullptr) + GetColumn(column, spans_out); + return pixels + column * Height; +} + +const uint32_t *FTexture::GetPixelsBgra() +{ + if (BgraPixels.empty()) + { + const BYTE *indices = GetPixels(); + BgraPixels.resize(Width * Height); + for (int i = 0; i < Width * Height; i++) + { + BgraPixels[i] = GPalette.BaseColors[indices[i]].d; + } + } + return BgraPixels.data(); +} + bool FTexture::CheckModified () { return false; diff --git a/src/textures/textures.h b/src/textures/textures.h index 14667093c..0d066eff5 100644 --- a/src/textures/textures.h +++ b/src/textures/textures.h @@ -3,6 +3,7 @@ #include "doomtype.h" #include "vectors.h" +#include class FBitmap; struct FRemapTable; @@ -175,9 +176,15 @@ public: // Returns a single column of the texture virtual const BYTE *GetColumn (unsigned int column, const Span **spans_out) = 0; + // Returns a single column of the texture, in BGRA8 format + virtual const uint32_t *GetColumnBgra(unsigned int column, const Span **spans_out); + // Returns the whole texture, stored in column-major order virtual const BYTE *GetPixels () = 0; - + + // Returns the whole texture, stored in column-major order, in BGRA8 format + virtual const uint32_t *GetPixelsBgra(); + virtual int CopyTrueColorPixels(FBitmap *bmp, int x, int y, int rotate=0, FCopyInfo *inf = NULL); int CopyTrueColorTranslated(FBitmap *bmp, int x, int y, int rotate, FRemapTable *remap, FCopyInfo *inf = NULL); virtual bool UseBasePalette(); @@ -262,6 +269,9 @@ protected: Rotations = other->Rotations; } +private: + std::vector BgraPixels; + public: static void FlipSquareBlock (BYTE *block, int x, int y); static void FlipSquareBlockRemap (BYTE *block, int x, int y, const BYTE *remap); From 24f846f702bb6f8fb57a4a6370f33267442e385c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 10 Jun 2016 14:25:56 +0200 Subject: [PATCH 25/94] Bug fixes --- src/r_draw_rgba.cpp | 6 ++---- src/r_main.h | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 528c3c986..fa632cb5d 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -1511,8 +1511,6 @@ public: { // 64x64 is the most common case by far, so special case it. - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - int sse_count = count / 4; count -= sse_count * 4; @@ -1545,7 +1543,7 @@ public: // Lookup pixel from flat texture tile, // re-index using light/colormap. - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); SSE_SHADE_SIMPLE(fg); _mm_storeu_si128((__m128i*)dest, fg); @@ -1582,7 +1580,7 @@ public: // Lookup pixel from flat texture tile, // re-index using light/colormap. - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); SSE_SHADE(fg, shade_constants); _mm_storeu_si128((__m128i*)dest, fg); diff --git a/src/r_main.h b/src/r_main.h index 6d0e2a21f..d71d44fe1 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -217,6 +217,27 @@ FORCEINLINE uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConst return 0xff000000 | (red << 16) | (green << 8) | blue; } +FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) +{ + uint32_t fg_alpha = (fg >> 24) & 0xff; + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t alpha = fg_alpha + (fg_alpha >> 7); // 255 -> 256 + uint32_t inv_alpha = 256 - alpha; + + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = bg & 0xff; + + uint32_t red = ((fg_red * alpha) + (bg_red * inv_alpha)) / 256; + uint32_t green = ((fg_green * alpha) + (bg_green * inv_alpha)) / 256; + uint32_t blue = ((fg_blue * alpha) + (bg_blue * inv_alpha)) / 256; + + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + // Calculate constants for a simple shade #define SSE_SHADE_SIMPLE_INIT(light) \ __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ From 6c70eaea2f46cdd2625ad0b51bbcc194ac3200e8 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 10 Jun 2016 15:56:50 +0200 Subject: [PATCH 26/94] Add jpeg bgra support --- src/textures/jpegtexture.cpp | 116 +++++++++++++++++++++++++++++++++-- 1 file changed, 111 insertions(+), 5 deletions(-) diff --git a/src/textures/jpegtexture.cpp b/src/textures/jpegtexture.cpp index 225396598..a37eff6c3 100644 --- a/src/textures/jpegtexture.cpp +++ b/src/textures/jpegtexture.cpp @@ -187,6 +187,7 @@ public: const BYTE *GetColumn (unsigned int column, const Span **spans_out); const BYTE *GetPixels (); + const uint32_t *GetPixelsBgra (); void Unload (); FTextureFormat GetFormat (); int CopyTrueColorPixels(FBitmap *bmp, int x, int y, int rotate, FCopyInfo *inf = NULL); @@ -195,9 +196,11 @@ public: protected: BYTE *Pixels; + std::vector PixelsBgra; Span DummySpans[2]; void MakeTexture (); + void MakeTextureBgra (); friend class FTexture; }; @@ -295,11 +298,9 @@ FJPEGTexture::~FJPEGTexture () void FJPEGTexture::Unload () { - if (Pixels != NULL) - { - delete[] Pixels; - Pixels = NULL; - } + delete[] Pixels; + Pixels = NULL; + PixelsBgra.clear(); } //========================================================================== @@ -358,6 +359,15 @@ const BYTE *FJPEGTexture::GetPixels () return Pixels; } +const uint32_t *FJPEGTexture::GetPixelsBgra() +{ + if (PixelsBgra.empty()) + { + MakeTextureBgra(); + } + return PixelsBgra.data(); +} + //========================================================================== // // @@ -457,6 +467,102 @@ void FJPEGTexture::MakeTexture () } } +void FJPEGTexture::MakeTextureBgra() +{ + FWadLump lump = Wads.OpenLumpNum(SourceLump); + JSAMPLE *buff = NULL; + + jpeg_decompress_struct cinfo; + jpeg_error_mgr jerr; + + PixelsBgra.resize(Width * Height, 0xffba0000); + + cinfo.err = jpeg_std_error(&jerr); + cinfo.err->output_message = JPEG_OutputMessage; + cinfo.err->error_exit = JPEG_ErrorExit; + jpeg_create_decompress(&cinfo); + try + { + FLumpSourceMgr sourcemgr(&lump, &cinfo); + jpeg_read_header(&cinfo, TRUE); + if (!((cinfo.out_color_space == JCS_RGB && cinfo.num_components == 3) || + (cinfo.out_color_space == JCS_CMYK && cinfo.num_components == 4) || + (cinfo.out_color_space == JCS_GRAYSCALE && cinfo.num_components == 1))) + { + Printf(TEXTCOLOR_ORANGE "Unsupported color format\n"); + throw - 1; + } + + jpeg_start_decompress(&cinfo); + + int y = 0; + buff = new BYTE[cinfo.output_width * cinfo.output_components]; + + while (cinfo.output_scanline < cinfo.output_height) + { + int num_scanlines = jpeg_read_scanlines(&cinfo, &buff, 1); + BYTE *in = buff; + uint32_t *out = PixelsBgra.data() + y; + switch (cinfo.out_color_space) + { + case JCS_RGB: + for (int x = Width; x > 0; --x) + { + uint32_t r = in[0]; + uint32_t g = in[1]; + uint32_t b = in[2]; + *out = 0xff000000 | (r << 16) | (g << 8) | b; + out += Height; + in += 3; + } + break; + + case JCS_GRAYSCALE: + for (int x = Width; x > 0; --x) + { + uint32_t gray = in[0]; + *out = 0xff000000 | (gray << 16) | (gray << 8) | gray; + out += Height; + in += 1; + } + break; + + case JCS_CMYK: + // What are you doing using a CMYK image? :) + for (int x = Width; x > 0; --x) + { + // To be precise, these calculations should use 255, but + // 256 is much faster and virtually indistinguishable. + uint32_t r = in[3] - (((256 - in[0])*in[3]) >> 8); + uint32_t g = in[3] - (((256 - in[1])*in[3]) >> 8); + uint32_t b = in[3] - (((256 - in[2])*in[3]) >> 8); + *out = 0xff000000 | (r << 16) | (g << 8) | b; + out += Height; + in += 4; + } + break; + + default: + // The other colorspaces were considered above and discarded, + // but GCC will complain without a default for them here. + break; + } + y++; + } + jpeg_finish_decompress(&cinfo); + jpeg_destroy_decompress(&cinfo); + } + catch (int) + { + Printf(TEXTCOLOR_ORANGE " in texture %s\n", Name.GetChars()); + jpeg_destroy_decompress(&cinfo); + } + if (buff != NULL) + { + delete[] buff; + } +} + //=========================================================================== // From 103a6baac54d7d874021ad94f40098cc66d787dc Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 10 Jun 2016 15:57:31 +0200 Subject: [PATCH 27/94] Support for drawing in multiple passes --- src/r_draw.h | 17 +++++-- src/r_draw_rgba.cpp | 105 +++++++++++++++++++++++++++++++++++++++---- src/r_drawt_rgba.cpp | 62 +++++++++++++------------ 3 files changed, 141 insertions(+), 43 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index 3f97a7a65..409b7c01b 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -458,25 +458,34 @@ public: // Number of active threads int num_cores = 1; + // Range of rows processed this pass + int pass_start_y = 0; + int pass_end_y = 300; + uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; uint32_t *dc_temp_rgba; // Checks if a line is rendered by this thread bool line_skipped_by_thread(int line) { - return line % num_cores != core; + return line < pass_start_y || line >= pass_end_y || line % num_cores != core; } // The number of lines to skip to reach the first line to be rendered by this thread int skipped_by_thread(int first_line) { - return (num_cores - (first_line - core) % num_cores) % num_cores; + int pass_skip = MAX(pass_start_y - first_line, 0); + int core_skip = (num_cores - (first_line + pass_skip - core) % num_cores) % num_cores; + return pass_skip + core_skip; } // The number of lines to be rendered by this thread int count_for_thread(int first_line, int count) { - return (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores; + int lines_until_pass_end = MAX(pass_end_y - first_line, 0); + count = MIN(count, lines_until_pass_end); + int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores; + return MAX(c, 0); } // Calculate the dest address for the first line to be rendered by this thread @@ -522,6 +531,8 @@ class DrawerCommandQueue bool no_threading = false; DrawerThread single_core_thread; + int num_passes = 2; + int rows_in_pass = 540; void StartThreads(); void StopThreads(); diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index fa632cb5d..b81ee4cca 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -104,11 +104,19 @@ void DrawerCommandQueue::Finish() thread.core = 0; thread.num_cores = queue->threads.size() + 1; - size_t size = queue->active_commands.size(); - for (size_t i = 0; i < size; i++) + for (int pass = 0; pass < queue->num_passes; pass++) { - auto &command = queue->active_commands[i]; - command->Execute(&thread); + thread.pass_start_y = pass * queue->rows_in_pass; + thread.pass_end_y = (pass + 1) * queue->rows_in_pass; + if (pass + 1 == queue->num_passes) + thread.pass_end_y = MAX(thread.pass_end_y, MAXHEIGHT); + + size_t size = queue->active_commands.size(); + for (size_t i = 0; i < size; i++) + { + auto &command = queue->active_commands[i]; + command->Execute(&thread); + } } // Wait for everyone to finish: @@ -156,11 +164,19 @@ void DrawerCommandQueue::StartThreads() start_lock.unlock(); // Do the work: - size_t size = queue->active_commands.size(); - for (size_t i = 0; i < size; i++) + for (int pass = 0; pass < queue->num_passes; pass++) { - auto &command = queue->active_commands[i]; - command->Execute(thread); + thread->pass_start_y = pass * queue->rows_in_pass; + thread->pass_end_y = (pass + 1) * queue->rows_in_pass; + if (pass + 1 == queue->num_passes) + thread->pass_end_y = MAX(thread->pass_end_y, MAXHEIGHT); + + size_t size = queue->active_commands.size(); + for (size_t i = 0; i < size; i++) + { + auto &command = queue->active_commands[i]; + command->Execute(thread); + } } // Notify main thread that we finished: @@ -1611,6 +1627,79 @@ public: BYTE xshift = yshift - ds_xbits; int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + int sse_count = count / 4; + count -= sse_count * 4; + + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT(light); + + while (sse_count--) + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += 4; + } + } + else + { + SSE_SHADE_INIT(light, shade_constants); + + while (sse_count--) + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + dest += 4; + } + } + + if (count == 0) + return; + do { // Current texture index in u,v. diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index c2caec0c2..28c86d3f5 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -1528,40 +1528,38 @@ public: fracstep = dc_iscale; frac = dc_texturefrac; - { - const BYTE *source = dc_source; + const BYTE *source = dc_source; - if (count & 1) { - *dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep; - } - if (count & 2) { - dest[0] = source[frac >> FRACBITS]; frac += fracstep; - dest[4] = source[frac >> FRACBITS]; frac += fracstep; - dest += 8; - } - if (count & 4) { - dest[0] = source[frac >> FRACBITS]; frac += fracstep; - dest[4] = source[frac >> FRACBITS]; frac += fracstep; - dest[8] = source[frac >> FRACBITS]; frac += fracstep; - dest[12] = source[frac >> FRACBITS]; frac += fracstep; - dest += 16; - } - count >>= 3; - if (!count) return; - - do - { - dest[0] = source[frac >> FRACBITS]; frac += fracstep; - dest[4] = source[frac >> FRACBITS]; frac += fracstep; - dest[8] = source[frac >> FRACBITS]; frac += fracstep; - dest[12] = source[frac >> FRACBITS]; frac += fracstep; - dest[16] = source[frac >> FRACBITS]; frac += fracstep; - dest[20] = source[frac >> FRACBITS]; frac += fracstep; - dest[24] = source[frac >> FRACBITS]; frac += fracstep; - dest[28] = source[frac >> FRACBITS]; frac += fracstep; - dest += 32; - } while (--count); + if (count & 1) { + *dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep; } + if (count & 2) { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest += 8; + } + if (count & 4) { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest[8] = source[frac >> FRACBITS]; frac += fracstep; + dest[12] = source[frac >> FRACBITS]; frac += fracstep; + dest += 16; + } + count >>= 3; + if (!count) return; + + do + { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest[8] = source[frac >> FRACBITS]; frac += fracstep; + dest[12] = source[frac >> FRACBITS]; frac += fracstep; + dest[16] = source[frac >> FRACBITS]; frac += fracstep; + dest[20] = source[frac >> FRACBITS]; frac += fracstep; + dest[24] = source[frac >> FRACBITS]; frac += fracstep; + dest[28] = source[frac >> FRACBITS]; frac += fracstep; + dest += 32; + } while (--count); } }; From 27156eb60ad4d12e32ea24cf90437483c2373818 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 10 Jun 2016 16:32:47 +0200 Subject: [PATCH 28/94] Linux compile fixes and missing variable declarations --- src/r_draw.h | 5 +++-- src/r_draw_rgba.cpp | 5 +++++ src/r_segs.cpp | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index 409b7c01b..37a0e6778 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -28,6 +28,7 @@ #include #include #include +#include // Spectre/Invisibility. #define FUZZTABLE 50 @@ -460,7 +461,7 @@ public: // Range of rows processed this pass int pass_start_y = 0; - int pass_end_y = 300; + int pass_end_y = MAXHEIGHT; uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; uint32_t *dc_temp_rgba; @@ -527,7 +528,7 @@ class DrawerCommandQueue std::mutex end_mutex; std::condition_variable end_condition; - int finished_threads = 0; + size_t finished_threads = 0; bool no_threading = false; DrawerThread single_core_thread; diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index b81ee4cca..e2dbd443a 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -1726,6 +1726,7 @@ class DrawSpanMaskedRGBACommand : public DrawerCommand fixed_t ds_yfrac; BYTE *dc_destorg; int ds_x1; + int ds_x2; int ds_y1; int ds_y; fixed_t ds_xstep; @@ -1828,6 +1829,7 @@ class DrawSpanTranslucentRGBACommand : public DrawerCommand fixed_t ds_yfrac; BYTE *dc_destorg; int ds_x1; + int ds_x2; int ds_y1; int ds_y; fixed_t ds_xstep; @@ -1949,6 +1951,7 @@ class DrawSpanMaskedTranslucentRGBACommand : public DrawerCommand fixed_t ds_yfrac; BYTE *dc_destorg; int ds_x1; + int ds_x2; int ds_y1; int ds_y; fixed_t ds_xstep; @@ -2080,6 +2083,7 @@ class DrawSpanAddClampRGBACommand : public DrawerCommand fixed_t ds_yfrac; BYTE *dc_destorg; int ds_x1; + int ds_x2; int ds_y1; int ds_y; fixed_t ds_xstep; @@ -2201,6 +2205,7 @@ class DrawSpanMaskedAddClampRGBACommand : public DrawerCommand fixed_t ds_yfrac; BYTE *dc_destorg; int ds_x1; + int ds_x2; int ds_y1; int ds_y; fixed_t ds_xstep; diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 87ce48ec4..c1d1ad744 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1184,7 +1184,7 @@ void calc_uv_start_and_step(int y1, float swal, double yrepeat, uint32_t uv_heig // Find start uv in [0-uv_height[ range. // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / uv_height; - v = v - std::floor(v); + v = v - floor(v); v *= uv_height; v *= (1 << fracbits); From ffcfe0b54f19420b51284abcbfbec09f3aee9074 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 10 Jun 2016 17:08:45 +0200 Subject: [PATCH 29/94] Fix warning --- src/textures/texture.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index 1869491b1..d50081062 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -181,8 +181,6 @@ const uint32_t *FTexture::GetColumnBgra(unsigned int column, const Span **spans_ const uint32_t *pixels = GetPixelsBgra(); column %= Width; - if (column < 0) - column += Width; if (spans_out != nullptr) GetColumn(column, spans_out); From 07571da98ccad2fd2c360c4b73ba989a902f184b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 10 Jun 2016 18:43:49 +0200 Subject: [PATCH 30/94] Improved how threaded rendering is handled --- src/r_draw.h | 16 ++++++++++++---- src/r_draw_rgba.cpp | 24 ++++++++++++++++++++++-- src/r_main.cpp | 4 +++- src/r_swrenderer.cpp | 3 ++- 4 files changed, 39 insertions(+), 8 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index 37a0e6778..d192dc5e4 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -443,8 +443,11 @@ void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade); void R_SetTranslationMap(lighttable_t *translation); +// Redirect drawer commands to worker threads +void R_BeginDrawerCommands(); + // Wait until all drawers finished executing -void R_FinishDrawerCommands(); +void R_EndDrawerCommands(); class DrawerCommandQueue; @@ -530,13 +533,14 @@ class DrawerCommandQueue std::condition_variable end_condition; size_t finished_threads = 0; - bool no_threading = false; + int threaded_render = 0; DrawerThread single_core_thread; int num_passes = 2; int rows_in_pass = 540; void StartThreads(); void StopThreads(); + void Finish(); static DrawerCommandQueue *Instance(); @@ -551,7 +555,7 @@ public: static void QueueCommand(Types &&... args) { auto queue = Instance(); - if (queue->no_threading) + if (queue->threaded_render == 0) { T command(std::forward(args)...); command.Execute(&queue->single_core_thread); @@ -565,9 +569,13 @@ public: queue->commands.push_back(command); } } + + // Redirects all drawing commands to worker threads until Finish is called + // Begin/End blocks can be nested. + static void Begin(); // Wait until all worker threads finished executing commands - static void Finish(); + static void End(); }; #endif diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index e2dbd443a..23ab106a6 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -82,6 +82,21 @@ void* DrawerCommandQueue::AllocMemory(size_t size) return data; } +void DrawerCommandQueue::Begin() +{ + auto queue = Instance(); + queue->Finish(); + queue->threaded_render++; +} + +void DrawerCommandQueue::End() +{ + auto queue = Instance(); + queue->Finish(); + if (queue->threaded_render > 0) + queue->threaded_render--; +} + void DrawerCommandQueue::Finish() { auto queue = Instance(); @@ -3515,9 +3530,14 @@ public: ///////////////////////////////////////////////////////////////////////////// -void R_FinishDrawerCommands() +void R_BeginDrawerCommands() { - DrawerCommandQueue::Finish(); + DrawerCommandQueue::Begin(); +} + +void R_EndDrawerCommands() +{ + DrawerCommandQueue::End(); } void R_DrawColumnP_RGBA_C() diff --git a/src/r_main.cpp b/src/r_main.cpp index 348c70120..c1b78303b 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -960,6 +960,8 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, r_swtruecolor = canvas->IsBgra(); R_InitColumnDrawers(); } + + R_BeginDrawerCommands(); viewwidth = width; RenderTarget = canvas; @@ -979,7 +981,7 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, R_SetupBuffer (); screen->Unlock (); - R_FinishDrawerCommands(); + R_EndDrawerCommands(); viewactive = savedviewactive; r_swtruecolor = savedoutputformat; diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 62190b606..11f879c38 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -162,10 +162,11 @@ void FSoftwareRenderer::RenderView(player_t *player) R_InitColumnDrawers(); } + R_BeginDrawerCommands(); R_RenderActorView (player->mo); // [RH] Let cameras draw onto textures that were visible this frame. FCanvasTextureInfo::UpdateAll (); - R_FinishDrawerCommands(); + R_EndDrawerCommands(); } //========================================================================== From d5331e60951bbd4509ce1e2f5be13239c58a04d6 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 10 Jun 2016 22:22:40 +0200 Subject: [PATCH 31/94] Wallscan fix --- src/r_draw_rgba.cpp | 8 +++---- src/r_segs.cpp | 53 ++++++++++++++++++++++----------------------- 2 files changed, 30 insertions(+), 31 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 23ab106a6..e3a64dd7a 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -3661,16 +3661,16 @@ void R_FillSpan_RGBA() DrawerCommandQueue::QueueCommand(); } -extern FTexture *rw_pic; // For the asserts below +//extern FTexture *rw_pic; // For the asserts below DWORD vlinec1_RGBA() { - DWORD fracstep = dc_iscale; + /*DWORD fracstep = dc_iscale; DWORD frac = dc_texturefrac; DWORD height = rw_pic->GetHeight(); assert((frac >> vlinebits) < height); - frac += dc_count * fracstep; - assert((frac >> vlinebits) <= height); + frac += (dc_count-1) * fracstep; + assert((frac >> vlinebits) <= height);*/ DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; diff --git a/src/r_segs.cpp b/src/r_segs.cpp index c1d1ad744..451ddf986 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1087,23 +1087,21 @@ uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv { uint32_t uv_pos = uv_start; - int left = y2 - y1; + uint32_t left = y2 - y1; while (left > 0) { - int next_uv_wrap = (uv_max - uv_pos + uv_step - 1) / uv_step; - int count = MIN(left, next_uv_wrap); - if (count <= 0) - break; // This should never happen, but it does.. + uint32_t available = uv_max - uv_pos; + uint32_t next_uv_wrap = available / uv_step; + if (available % uv_step != 0) + next_uv_wrap++; + uint32_t count = MIN(left, next_uv_wrap); - if (count > 0) - { - dc_source = source; - dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; - dc_count = count; - dc_iscale = uv_step; - dc_texturefrac = uv_pos; - draw1column(); - } + dc_source = source; + dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_count = count; + dc_iscale = uv_step; + dc_texturefrac = uv_pos; + draw1column(); left -= count; uv_pos += uv_step * count; @@ -1138,30 +1136,28 @@ void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_ste for (int i = 0; i < 4; i++) bufplce[i] = source[i]; - int left = y2 - y1; + uint32_t left = y2 - y1; while (left > 0) { // Find which column wraps first - int count = left; + uint32_t count = left; for (int i = 0; i < 4; i++) { - int next_uv_wrap = (uv_max - uv_pos[i] + uv_step[i] - 1) / uv_step[i]; + uint32_t available = uv_max - uv_pos[i]; + uint32_t next_uv_wrap = available / uv_step[i]; + if (available % uv_step[i] != 0) + next_uv_wrap++; count = MIN(next_uv_wrap, count); } - if (count <= 0) - break; // This should never happen, but it does.. // Draw until that column wraps - if (count > 0) + for (int i = 0; i < 4; i++) { - for (int i = 0; i < 4; i++) - { - vplce[i] = uv_pos[i]; - vince[i] = uv_step[i]; - } - dc_count = count; - draw4columns(); + vplce[i] = uv_pos[i]; + vince[i] = uv_step[i]; } + dc_count = count; + draw4columns(); // Wrap the uv position for (int i = 0; i < 4; i++) @@ -1299,6 +1295,9 @@ void wallscan_any( { for (int i = 0; i < 4; i++) { + if (y2[i] <= y1[i]) + continue; + if (!fixed) R_SetColorMapLight(basecolormap, lights[i], wallshade); wallscan_drawcol1(x + i, y1[i], y2[i], uv_pos[i], uv_step[i], uv_max, source[i], draw1column); From a6d696bbfd45c30bd7162ab948cdf4ba36fa170e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 11 Jun 2016 00:50:36 +0200 Subject: [PATCH 32/94] Undo removal of wallscan_np2 and wallscan_np2_ds --- src/r_segs.cpp | 131 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 115 insertions(+), 16 deletions(-) diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 451ddf986..ad242b2f9 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1072,16 +1072,17 @@ uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv if (uv_max == 0) // power of two { int count = y2 - y1; - if (count > 0) - { - dc_source = source; - dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; - dc_count = y2 - y1; - dc_iscale = uv_step; - dc_texturefrac = uv_start; - draw1column(); - } - return uv_start + uv_step * (uint32_t)count; + + dc_source = source; + dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_count = count; + dc_iscale = uv_step; + dc_texturefrac = uv_start; + draw1column(); + + uint64_t step64 = uv_step; + uint64_t pos64 = uv_start; + return (uint32_t)(pos64 + step64 * count); } else { @@ -1119,15 +1120,19 @@ void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_ste int pixelsize = r_swtruecolor ? 4 : 1; if (uv_max == 0) // power of two, no wrap handling needed { + int count = y2 - y1; for (int i = 0; i < 4; i++) { bufplce[i] = source[i]; vplce[i] = uv_pos[i]; vince[i] = uv_step[i]; - uv_pos[i] += uv_step[i] * (y2 - y1); + + uint64_t step64 = uv_step[i]; + uint64_t pos64 = uv_pos[i]; + uv_pos[i] = (uint32_t)(pos64 + step64 * count); } dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; - dc_count = y2 - y1; + dc_count = count; draw4columns(); } else @@ -1481,16 +1486,110 @@ static void call_wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, } } -// wallscan now tiles with non-power-of-two textures - this function is therefore not needed anymore.. +//============================================================================= +// +// wallscan_np2 +// +// This is a wrapper around wallscan that helps it tile textures whose heights +// are not powers of 2. It divides the wall into texture-sized strips and calls +// wallscan for each of those. Since only one repetition of the texture fits +// in each strip, wallscan will not tile. +// +//============================================================================= + void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, bool mask) { - call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, mask); + if (!r_np2) + { + call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, mask); + } + else + { + short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; + short *up, *down; + double texheight = rw_pic->GetHeight(); + double partition; + double scaledtexheight = texheight / yrepeat; + + if (yrepeat >= 0) + { // normal orientation: draw strips from top to bottom + partition = top - fmod(top - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); + if (partition == top) + { + partition -= scaledtexheight; + } + up = uwal; + down = most1; + dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; + while (partition > bot) + { + int j = OWallMost(most3, partition - ViewPos.Z, &WallC); + if (j != 3) + { + for (int j = x1; j < x2; ++j) + { + down[j] = clamp(most3[j], up[j], dwal[j]); + } + call_wallscan(x1, x2, up, down, swal, lwal, yrepeat, mask); + up = down; + down = (down == most1) ? most2 : most1; + } + partition -= scaledtexheight; + dc_texturemid -= texheight; + } + call_wallscan(x1, x2, up, dwal, swal, lwal, yrepeat, mask); + } + else + { // upside down: draw strips from bottom to top + partition = bot - fmod(bot - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); + up = most1; + down = dwal; + dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; + while (partition < top) + { + int j = OWallMost(most3, partition - ViewPos.Z, &WallC); + if (j != 12) + { + for (int j = x1; j < x2; ++j) + { + up[j] = clamp(most3[j], uwal[j], down[j]); + } + call_wallscan(x1, x2, up, down, swal, lwal, yrepeat, mask); + down = up; + up = (up == most1) ? most2 : most1; + } + partition -= scaledtexheight; + dc_texturemid -= texheight; + } + call_wallscan(x1, x2, uwal, down, swal, lwal, yrepeat, mask); + } + } } -// wallscan now tiles with non-power-of-two textures - this function is therefore not needed anymore.. static void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat) { - call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, true); + if (rw_pic->GetHeight() != 1 << rw_pic->HeightBits) + { + double frontcz1 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v1); + double frontfz1 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v1); + double frontcz2 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v2); + double frontfz2 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v2); + double top = MAX(frontcz1, frontcz2); + double bot = MIN(frontfz1, frontfz2); + if (fake3D & FAKE3D_CLIPTOP) + { + top = MIN(top, sclipTop); + } + if (fake3D & FAKE3D_CLIPBOTTOM) + { + bot = MAX(bot, sclipBottom); + } + wallscan_np2(x1, x2, uwal, dwal, swal, lwal, yrepeat, top, bot, true); + } + else + { + call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, true); + } } // From 5ae8e9e8c2a68fb55aab598ba46ad86762fc8806 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 11 Jun 2016 16:17:30 +0200 Subject: [PATCH 33/94] Fix missing colormap lookup --- src/r_draw.cpp | 69 +++++++++++++-------------- src/r_draw_rgba.cpp | 28 ++++++----- src/r_drawt_rgba.cpp | 108 +++++++++++++++++++++++++------------------ 3 files changed, 106 insertions(+), 99 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 2710b9992..70b3893f4 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -2793,24 +2793,19 @@ bool R_GetTransMaskDrawers (fixed_t (**tmvline1)(), void (**tmvline4)()) void R_SetTranslationMap(lighttable_t *translation) { dc_fcolormap = nullptr; - dc_shade_constants.light_red = 256; - dc_shade_constants.light_green = 256; - dc_shade_constants.light_blue = 256; - dc_shade_constants.light_alpha = 256; - dc_shade_constants.fade_red = 0; - dc_shade_constants.fade_green = 0; - dc_shade_constants.fade_blue = 0; - dc_shade_constants.fade_alpha = 256; - dc_shade_constants.desaturate = 0; - dc_shade_constants.simple_shade = true; + dc_colormap = translation; if (r_swtruecolor) { - dc_colormap = translation; - dc_light = 0; - } - else - { - dc_colormap = translation; + dc_shade_constants.light_red = 256; + dc_shade_constants.light_green = 256; + dc_shade_constants.light_blue = 256; + dc_shade_constants.light_alpha = 256; + dc_shade_constants.fade_red = 0; + dc_shade_constants.fade_green = 0; + dc_shade_constants.fade_blue = 0; + dc_shade_constants.fade_alpha = 256; + dc_shade_constants.desaturate = 0; + dc_shade_constants.simple_shade = true; dc_light = 0; } } @@ -2818,49 +2813,47 @@ void R_SetTranslationMap(lighttable_t *translation) void R_SetColorMapLight(FColormap *base_colormap, float light, int shade) { dc_fcolormap = base_colormap; - dc_shade_constants.light_red = dc_fcolormap->Color.r * 256 / 255; - dc_shade_constants.light_green = dc_fcolormap->Color.g * 256 / 255; - dc_shade_constants.light_blue = dc_fcolormap->Color.b * 256 / 255; - dc_shade_constants.light_alpha = dc_fcolormap->Color.a * 256 / 255; - dc_shade_constants.fade_red = dc_fcolormap->Fade.r; - dc_shade_constants.fade_green = dc_fcolormap->Fade.g; - dc_shade_constants.fade_blue = dc_fcolormap->Fade.b; - dc_shade_constants.fade_alpha = dc_fcolormap->Fade.a; - dc_shade_constants.desaturate = MIN(abs(dc_fcolormap->Desaturate), 255) * 255 / 256; - dc_shade_constants.simple_shade = (dc_fcolormap->Color.d == 0x00ffffff && dc_fcolormap->Fade.d == 0x00000000 && dc_fcolormap->Desaturate == 0); if (r_swtruecolor) { + dc_shade_constants.light_red = dc_fcolormap->Color.r * 256 / 255; + dc_shade_constants.light_green = dc_fcolormap->Color.g * 256 / 255; + dc_shade_constants.light_blue = dc_fcolormap->Color.b * 256 / 255; + dc_shade_constants.light_alpha = dc_fcolormap->Color.a * 256 / 255; + dc_shade_constants.fade_red = dc_fcolormap->Fade.r; + dc_shade_constants.fade_green = dc_fcolormap->Fade.g; + dc_shade_constants.fade_blue = dc_fcolormap->Fade.b; + dc_shade_constants.fade_alpha = dc_fcolormap->Fade.a; + dc_shade_constants.desaturate = MIN(abs(dc_fcolormap->Desaturate), 255) * 255 / 256; + dc_shade_constants.simple_shade = (dc_fcolormap->Color.d == 0x00ffffff && dc_fcolormap->Fade.d == 0x00000000 && dc_fcolormap->Desaturate == 0); dc_colormap = base_colormap->Maps; dc_light = LIGHTSCALE(light, shade); } else { dc_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); - dc_light = 0; } } void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade) { ds_fcolormap = base_colormap; - ds_shade_constants.light_red = ds_fcolormap->Color.r * 256 / 255; - ds_shade_constants.light_green = ds_fcolormap->Color.g * 256 / 255; - ds_shade_constants.light_blue = ds_fcolormap->Color.b * 256 / 255; - ds_shade_constants.light_alpha = ds_fcolormap->Color.a * 256 / 255; - ds_shade_constants.fade_red = ds_fcolormap->Fade.r; - ds_shade_constants.fade_green = ds_fcolormap->Fade.g; - ds_shade_constants.fade_blue = ds_fcolormap->Fade.b; - ds_shade_constants.fade_alpha = ds_fcolormap->Fade.a; - ds_shade_constants.desaturate = MIN(abs(ds_fcolormap->Desaturate), 255) * 255 / 256; - ds_shade_constants.simple_shade = (ds_fcolormap->Color.d == 0x00ffffff && ds_fcolormap->Fade.d == 0x00000000 && ds_fcolormap->Desaturate == 0); if (r_swtruecolor) { + ds_shade_constants.light_red = ds_fcolormap->Color.r * 256 / 255; + ds_shade_constants.light_green = ds_fcolormap->Color.g * 256 / 255; + ds_shade_constants.light_blue = ds_fcolormap->Color.b * 256 / 255; + ds_shade_constants.light_alpha = ds_fcolormap->Color.a * 256 / 255; + ds_shade_constants.fade_red = ds_fcolormap->Fade.r; + ds_shade_constants.fade_green = ds_fcolormap->Fade.g; + ds_shade_constants.fade_blue = ds_fcolormap->Fade.b; + ds_shade_constants.fade_alpha = ds_fcolormap->Fade.a; + ds_shade_constants.desaturate = MIN(abs(ds_fcolormap->Desaturate), 255) * 255 / 256; + ds_shade_constants.simple_shade = (ds_fcolormap->Color.d == 0x00ffffff && ds_fcolormap->Fade.d == 0x00000000 && ds_fcolormap->Desaturate == 0); ds_colormap = base_colormap->Maps; ds_light = LIGHTSCALE(light, shade); } else { ds_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); - ds_light = 0; } } diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index e3a64dd7a..979dc0743 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -229,6 +229,7 @@ class DrawColumnRGBACommand : public DrawerCommand const BYTE *dc_source; int dc_pitch; ShadeConstants dc_shade_constants; + BYTE *dc_colormap; public: DrawColumnRGBACommand() @@ -241,6 +242,7 @@ public: dc_source = ::dc_source; dc_pitch = ::dc_pitch; dc_shade_constants = ::dc_shade_constants; + dc_colormap = ::dc_colormap; } void Execute(DrawerThread *thread) override @@ -267,24 +269,20 @@ public: fracstep = dc_iscale * thread->num_cores; frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + // [RH] Get local copies of these variables so that the compiler + // has a better chance of optimizing this well. + const BYTE *source = dc_source; + int pitch = dc_pitch * thread->num_cores; + BYTE *colormap = dc_colormap; + + do { - // [RH] Get local copies of these variables so that the compiler - // has a better chance of optimizing this well. - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; + *dest = shade_pal_index(colormap[source[frac >> FRACBITS]], light, shade_constants); - // Inner loop that does the actual texture mapping, - // e.g. a DDA-lile scaling. - // This is as fast as it gets. - do - { - *dest = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); + dest += pitch; + frac += fracstep; - dest += pitch; - frac += fracstep; - - } while (--count); - } + } while (--count); } }; diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 28c86d3f5..5f0fc4156 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -90,13 +90,13 @@ public: sincr = thread->num_cores * 4; if (count & 1) { - *dest = *source; + *dest = GPalette.BaseColors[*source]; source += sincr; dest += pitch; } if (count & 2) { - dest[0] = source[0]; - dest[pitch] = source[sincr]; + dest[0] = GPalette.BaseColors[source[0]]; + dest[pitch] = GPalette.BaseColors[source[sincr]]; source += sincr * 2; dest += pitch * 2; } @@ -104,10 +104,10 @@ public: return; do { - dest[0] = source[0]; - dest[pitch] = source[sincr]; - dest[pitch * 2] = source[sincr * 2]; - dest[pitch * 3] = source[sincr * 3]; + dest[0] = GPalette.BaseColors[source[0]]; + dest[pitch] = GPalette.BaseColors[source[sincr]]; + dest[pitch * 2] = GPalette.BaseColors[source[sincr * 2]]; + dest[pitch * 3] = GPalette.BaseColors[source[sincr * 3]]; source += sincr * 4; dest += pitch * 4; } while (--count); @@ -124,6 +124,7 @@ class RtMap1colRGBACommand : public DrawerCommand ShadeConstants dc_shade_constants; BYTE *dc_destorg; int dc_pitch; + BYTE *dc_colormap; public: RtMap1colRGBACommand(int hx, int sx, int yl, int yh) @@ -137,6 +138,7 @@ public: dc_shade_constants = ::dc_shade_constants; dc_destorg = ::dc_destorg; dc_pitch = ::dc_pitch; + dc_colormap = ::dc_colormap; } void Execute(DrawerThread *thread) override @@ -158,9 +160,11 @@ public: source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; pitch = dc_pitch * thread->num_cores; sincr = thread->num_cores * 4; + + BYTE *colormap = dc_colormap; if (count & 1) { - *dest = shade_pal_index(*source, light, shade_constants); + *dest = shade_pal_index(colormap[*source], light, shade_constants); source += sincr; dest += pitch; } @@ -168,8 +172,8 @@ public: return; do { - dest[0] = shade_pal_index(source[0], light, shade_constants); - dest[pitch] = shade_pal_index(source[sincr], light, shade_constants); + dest[0] = shade_pal_index(colormap[source[0]], light, shade_constants); + dest[pitch] = shade_pal_index(colormap[source[sincr]], light, shade_constants); source += sincr * 2; dest += pitch * 2; } while (--count); @@ -185,6 +189,7 @@ class RtMap4colsRGBACommand : public DrawerCommand ShadeConstants dc_shade_constants; BYTE *dc_destorg; int dc_pitch; + BYTE *colormap; public: RtMap4colsRGBACommand(int sx, int yl, int yh) @@ -197,6 +202,7 @@ public: dc_shade_constants = ::dc_shade_constants; dc_destorg = ::dc_destorg; dc_pitch = ::dc_pitch; + dc_colormap = ::dc_colormap; } #ifdef NO_SSE @@ -219,12 +225,14 @@ public: source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; pitch = dc_pitch * thread->num_cores; sincr = thread->num_cores * 4; + + BYTE *colormap = dc_colormap; if (count & 1) { - dest[0] = shade_pal_index(source[0], light, shade_constants); - dest[1] = shade_pal_index(source[1], light, shade_constants); - dest[2] = shade_pal_index(source[2], light, shade_constants); - dest[3] = shade_pal_index(source[3], light, shade_constants); + dest[0] = shade_pal_index(colormap[source[0]], light, shade_constants); + dest[1] = shade_pal_index(colormap[source[1]], light, shade_constants); + dest[2] = shade_pal_index(colormap[source[2]], light, shade_constants); + dest[3] = shade_pal_index(colormap[source[3]], light, shade_constants); source += sincr; dest += pitch; } @@ -232,14 +240,14 @@ public: return; do { - dest[0] = shade_pal_index(source[0], light, shade_constants); - dest[1] = shade_pal_index(source[1], light, shade_constants); - dest[2] = shade_pal_index(source[2], light, shade_constants); - dest[3] = shade_pal_index(source[3], light, shade_constants); - dest[pitch] = shade_pal_index(source[sincr], light, shade_constants); - dest[pitch + 1] = shade_pal_index(source[sincr + 1], light, shade_constants); - dest[pitch + 2] = shade_pal_index(source[sincr + 2], light, shade_constants); - dest[pitch + 3] = shade_pal_index(source[sincr + 3], light, shade_constants); + dest[0] = shade_pal_index(colormap[source[0]], light, shade_constants); + dest[1] = shade_pal_index(colormap[source[1]], light, shade_constants); + dest[2] = shade_pal_index(colormap[source[2]], light, shade_constants); + dest[3] = shade_pal_index(colormap[source[3]], light, shade_constants); + dest[pitch] = shade_pal_index(colormap[source[sincr]], light, shade_constants); + dest[pitch + 1] = shade_pal_index(colormap[source[sincr + 1]], light, shade_constants); + dest[pitch + 2] = shade_pal_index(colormap[source[sincr + 2]], light, shade_constants); + dest[pitch + 3] = shade_pal_index(colormap[source[sincr + 3]], light, shade_constants); source += sincr * 2; dest += pitch * 2; } while (--count); @@ -265,16 +273,18 @@ public: source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; pitch = dc_pitch * thread->num_cores; sincr = thread->num_cores * 4; + + BYTE *colormap = dc_colormap; if (shade_constants.simple_shade) { SSE_SHADE_SIMPLE_INIT(light); if (count & 1) { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; // shade_pal_index: __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); @@ -290,10 +300,10 @@ public: do { // shade_pal_index 0-3 { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); SSE_SHADE_SIMPLE(fg); @@ -302,10 +312,10 @@ public: // shade_pal_index 4-7 (pitch) { - uint32_t p0 = source[sincr]; - uint32_t p1 = source[sincr + 1]; - uint32_t p2 = source[sincr + 2]; - uint32_t p3 = source[sincr + 3]; + uint32_t p0 = colormap[source[sincr]]; + uint32_t p1 = colormap[source[sincr + 1]]; + uint32_t p2 = colormap[source[sincr + 2]]; + uint32_t p3 = colormap[source[sincr + 3]]; __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); SSE_SHADE_SIMPLE(fg); @@ -321,10 +331,10 @@ public: SSE_SHADE_INIT(light, shade_constants); if (count & 1) { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; // shade_pal_index: __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); @@ -340,10 +350,10 @@ public: do { // shade_pal_index 0-3 { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); SSE_SHADE(fg, shade_constants); @@ -352,10 +362,10 @@ public: // shade_pal_index 4-7 (pitch) { - uint32_t p0 = source[sincr]; - uint32_t p1 = source[sincr + 1]; - uint32_t p2 = source[sincr + 2]; - uint32_t p3 = source[sincr + 3]; + uint32_t p0 = colormap[source[sincr]]; + uint32_t p1 = colormap[source[sincr + 1]]; + uint32_t p2 = colormap[source[sincr + 2]]; + uint32_t p3 = colormap[source[sincr + 3]]; __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); SSE_SHADE(fg, shade_constants); @@ -1800,6 +1810,9 @@ void rt_span_coverage_rgba(int x, int start, int stop) // drawn to the screen along with up to three other columns. void R_DrawColumnHorizP_RGBA_C (void) { + if (dc_count <= 0) + return; + int x = dc_x & 3; unsigned int **span = &dc_ctspan[x]; (*span)[0] = dc_yl; @@ -1812,6 +1825,9 @@ void R_DrawColumnHorizP_RGBA_C (void) // [RH] Just fills a column with a given color void R_FillColumnHorizP_RGBA_C (void) { + if (dc_count <= 0) + return; + int x = dc_x & 3; unsigned int **span = &dc_ctspan[x]; (*span)[0] = dc_yl; From 40b76dc9b0cd8d59c0f2c597cc4a690cb78ab89e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 11 Jun 2016 18:41:56 +0200 Subject: [PATCH 34/94] Apply gamma when using true color output on Linux and Mac --- src/posix/cocoa/i_video.mm | 5 +-- src/posix/sdl/sdlvideo.cpp | 12 +------ src/v_video.cpp | 67 ++++++++++++++++++++++++++++++++++++-- src/v_video.h | 1 + 4 files changed, 68 insertions(+), 17 deletions(-) diff --git a/src/posix/cocoa/i_video.mm b/src/posix/cocoa/i_video.mm index 425fe5887..ddfccaa57 100644 --- a/src/posix/cocoa/i_video.mm +++ b/src/posix/cocoa/i_video.mm @@ -869,10 +869,7 @@ void CocoaFrameBuffer::Update() if (IsBgra()) { - for (int y = 0; y < Height; y++) - { - memcpy((uint32_t*)m_pixelBuffer + y * Width, (uint32_t*)MemBuffer + y * Pitch, Width * BYTES_PER_PIXEL); - } + CopyWithGammaBgra(m_pixelBuffer, Width * BYTES_PER_PIXEL, m_gammaTable[0], m_gammaTable[1], m_gammaTable[2], m_flashColor, m_flashAmount); } else { diff --git a/src/posix/sdl/sdlvideo.cpp b/src/posix/sdl/sdlvideo.cpp index 26121aa71..56b883978 100644 --- a/src/posix/sdl/sdlvideo.cpp +++ b/src/posix/sdl/sdlvideo.cpp @@ -497,17 +497,7 @@ void SDLFB::Update () if (Bgra) { - if (pitch == Pitch * 4) - { - memcpy(pixels, MemBuffer, Width*Height*4); - } - else - { - for (int y = 0; y < Height; ++y) - { - memcpy((BYTE *)pixels + y*pitch, MemBuffer + y*Pitch*4, Width*4); - } - } + CopyWithGammaBgra(pixels, pitch, GammaTable[0], GammaTable[1], GammaTable[2], Flash, FlashAmount); } else if (NotPaletted) { diff --git a/src/v_video.cpp b/src/v_video.cpp index bc99edbf1..e58638121 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -742,13 +742,12 @@ void DCanvas::CalcGamma (float gamma, BYTE gammalookup[256]) // I found this formula on the web at // , // but that page no longer exits. - double invgamma = 1.f / gamma; int i; for (i = 0; i < 256; i++) { - gammalookup[i] = (BYTE)(255.0 * pow (i / 255.0, invgamma)); + gammalookup[i] = (BYTE)(255.0 * pow (i / 255.0, invgamma) + 0.5); } } @@ -876,6 +875,70 @@ DFrameBuffer::DFrameBuffer (int width, int height, bool bgra) Accel2D = false; } +//========================================================================== +// +// DFrameBuffer :: PostprocessBgra +// +// Copies data to destination buffer while performing gamma and flash. +// This is only needed if a target cannot do this with shaders. +// +//========================================================================== + +void DFrameBuffer::CopyWithGammaBgra(void *output, int pitch, const BYTE *gammared, const BYTE *gammagreen, const BYTE *gammablue, PalEntry flash, int flash_amount) +{ + const BYTE *gammatables[3] = { gammared, gammagreen, gammablue }; + + if (flash_amount > 0) + { + uint16_t inv_flash_amount = 256 - flash_amount; + uint16_t flash_red = flash.r * flash_amount; + uint16_t flash_green = flash.g * flash_amount; + uint16_t flash_blue = flash.b * flash_amount; + + for (int y = 0; y < Height; y++) + { + BYTE *dest = (BYTE*)output + y * pitch; + BYTE *src = MemBuffer + y * Pitch * 4; + for (int x = 0; x < Width; x++) + { + uint16_t fg_red = src[2]; + uint16_t fg_green = src[1]; + uint16_t fg_blue = src[0]; + uint16_t red = (fg_red * inv_flash_amount + flash_red) >> 8; + uint16_t green = (fg_green * inv_flash_amount + flash_green) >> 8; + uint16_t blue = (fg_blue * inv_flash_amount + flash_blue) >> 8; + + dest[0] = gammatables[2][blue]; + dest[1] = gammatables[1][green]; + dest[2] = gammatables[0][red]; + dest[3] = 0xff; + + dest += 4; + src += 4; + } + } + } + else + { + for (int y = 0; y < Height; y++) + { + BYTE *dest = (BYTE*)output + y * pitch; + BYTE *src = MemBuffer + y * Pitch * 4; + for (int x = 0; x < Width; x++) + { + dest[0] = gammatables[2][src[0]]; + dest[1] = gammatables[1][src[1]]; + dest[2] = gammatables[0][src[2]]; + dest[3] = 0xff; + + dest += 4; + src += 4; + } + } + } +} + + //========================================================================== // // DFrameBuffer :: DrawRateStuff diff --git a/src/v_video.h b/src/v_video.h index 120beff9a..19213bd26 100644 --- a/src/v_video.h +++ b/src/v_video.h @@ -420,6 +420,7 @@ public: protected: void DrawRateStuff (); void CopyFromBuff (BYTE *src, int srcPitch, int width, int height, BYTE *dest); + void CopyWithGammaBgra(void *output, int pitch, const BYTE *gammared, const BYTE *gammagreen, const BYTE *gammablue, PalEntry flash, int flash_amount); DFrameBuffer () {} From 42efc7334e5e5dc0419b020b4db36777bc647be4 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 12 Jun 2016 00:50:43 +0200 Subject: [PATCH 35/94] Fix missing particles in true color mode --- src/r_draw.h | 7 +++++-- src/r_draw_rgba.cpp | 5 +++++ src/r_things.cpp | 2 ++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index d192dc5e4..55ad8a0ca 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -570,12 +570,15 @@ public: } } - // Redirects all drawing commands to worker threads until Finish is called + // Redirects all drawing commands to worker threads until End is called // Begin/End blocks can be nested. static void Begin(); - // Wait until all worker threads finished executing commands + // End redirection and wait until all worker threads finished executing static void End(); + + // Waits until all worker threads finished executing + static void WaitForWorkers(); }; #endif diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 979dc0743..af8487964 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -97,6 +97,11 @@ void DrawerCommandQueue::End() queue->threaded_render--; } +void DrawerCommandQueue::WaitForWorkers() +{ + Instance()->Finish(); +} + void DrawerCommandQueue::Finish() { auto queue = Instance(); diff --git a/src/r_things.cpp b/src/r_things.cpp index 933d50e46..0c5e17b7c 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -2686,6 +2686,8 @@ void R_DrawParticle_RGBA(vissprite_t *vis) int countbase = vis->x2 - x1; R_DrawMaskedSegsBehindParticle(vis); + + DrawerCommandQueue::WaitForWorkers(); uint32_t fg = shade_pal_index_simple(color, calc_light_multiplier(LIGHTSCALE(0, vis->Style.ColormapNum << FRACBITS))); uint32_t fg_red = (fg >> 16) & 0xff; From 350857a9f6b2e6aa0a0da9f6eb27c2b05066c80e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 12 Jun 2016 19:19:44 +0200 Subject: [PATCH 36/94] Fixed fuzz effect when using multiple cores --- src/r_draw_rgba.cpp | 89 ++++++++++++--------------------------------- 1 file changed, 23 insertions(+), 66 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index af8487964..a5d924dfa 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -583,79 +583,36 @@ public: dest = thread->dest_for_thread(dc_yl, dc_pitch, ylookup[dc_yl] + dc_x + (uint32_t*)dc_destorg); - // Note: this implementation assumes this function is only used for the pinky shadow effect (i.e. no other fancy colormap than black) - // I'm not sure if this is really always the case or not. + int pitch = dc_pitch * thread->num_cores; + int fuzzstep = thread->num_cores; + int fuzz = (fuzzpos + thread->skipped_by_thread(dc_yl)) % FUZZTABLE; + while (count > 0) { - // [RH] Make local copies of global vars to try and improve - // the optimizations made by the compiler. - int pitch = dc_pitch * thread->num_cores; - int fuzz = fuzzpos; - int cnt; + int available = (FUZZTABLE - fuzz); + int next_wrap = available / fuzzstep; + if (available % fuzzstep != 0) + next_wrap++; - // [RH] Split this into three separate loops to minimize - // the number of times fuzzpos needs to be clamped. - if (fuzz) + int cnt = MIN(count, next_wrap); + count -= cnt; + do { - cnt = MIN(FUZZTABLE - fuzz, count); - count -= cnt; - do - { - uint32_t bg = dest[fuzzoffset[fuzz++]]; - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = (bg) & 0xff; + uint32_t bg = dest[fuzzoffset[fuzz]]; + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = (bg) & 0xff; - uint32_t red = bg_red * 3 / 4; - uint32_t green = bg_green * 3 / 4; - uint32_t blue = bg_blue * 3 / 4; + uint32_t red = bg_red * 3 / 4; + uint32_t green = bg_green * 3 / 4; + uint32_t blue = bg_blue * 3 / 4; - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--cnt); - } - if (fuzz == FUZZTABLE || count > 0) - { - while (count >= FUZZTABLE) - { - fuzz = 0; - cnt = FUZZTABLE; - count -= FUZZTABLE; - do - { - uint32_t bg = dest[fuzzoffset[fuzz++]]; - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = (bg) & 0xff; + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + fuzz += fuzzstep; + } while (--cnt); - uint32_t red = bg_red * 3 / 4; - uint32_t green = bg_green * 3 / 4; - uint32_t blue = bg_blue * 3 / 4; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--cnt); - } - fuzz = 0; - if (count > 0) - { - do - { - uint32_t bg = dest[fuzzoffset[fuzz++]]; - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = (bg) & 0xff; - - uint32_t red = bg_red * 3 / 4; - uint32_t green = bg_green * 3 / 4; - uint32_t blue = bg_blue * 3 / 4; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); - } - } - fuzzpos = fuzz; + fuzz %= FUZZTABLE; } } }; From 0f0859b0b2d8f82c89ea1674b6ecc999934ae659 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 12 Jun 2016 22:54:23 +0200 Subject: [PATCH 37/94] Special colormap support for when no hw accel is available --- src/r_draw.h | 18 +++++ src/r_draw_rgba.cpp | 152 +++++++++++++++++++++++++++++++++++++++++++ src/r_main.cpp | 4 +- src/r_swrenderer.cpp | 8 +++ src/r_things.cpp | 2 +- 5 files changed, 181 insertions(+), 3 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index 55ad8a0ca..d09d0ab89 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -581,4 +581,22 @@ public: static void WaitForWorkers(); }; +class ApplySpecialColormapRGBACommand : public DrawerCommand +{ + BYTE *buffer; + int pitch; + int width; + int height; + int start_red; + int start_green; + int start_blue; + int end_red; + int end_green; + int end_blue; + +public: + ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen); + void Execute(DrawerThread *thread) override; +}; + #endif diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index a5d924dfa..d5c275d0e 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -3488,6 +3488,158 @@ public: } }; +ApplySpecialColormapRGBACommand::ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen) +{ + buffer = screen->GetBuffer(); + pitch = screen->GetPitch(); + width = screen->GetWidth(); + height = screen->GetHeight(); + + start_red = (int)(colormap->ColorizeStart[0] * 255); + start_green = (int)(colormap->ColorizeStart[1] * 255); + start_blue = (int)(colormap->ColorizeStart[2] * 255); + end_red = (int)(colormap->ColorizeEnd[0] * 255); + end_green = (int)(colormap->ColorizeEnd[1] * 255); + end_blue = (int)(colormap->ColorizeEnd[2] * 255); +} + +#ifdef NO_SSE +void ApplySpecialColormapRGBACommand::Execute(DrawerThread *thread) +{ + int y = thread->skipped_by_thread(0); + int count = thread->count_for_thread(0, height); + while (count > 0) + { + BYTE *pixels = buffer + y * pitch * 4; + for (int x = 0; x < width; x++) + { + int fg_red = pixels[2]; + int fg_green = pixels[1]; + int fg_blue = pixels[0]; + + int gray = (fg_red * 77 + fg_green * 143 + fg_blue * 37) >> 8; + gray += (gray >> 7); // gray*=256/255 + int inv_gray = 256 - gray; + + int red = clamp((start_red * inv_gray + end_red * gray) >> 8, 0, 255); + int green = clamp((start_green * inv_gray + end_green * gray) >> 8, 0, 255); + int blue = clamp((start_blue * inv_gray + end_blue * gray) >> 8, 0, 255); + + pixels[0] = (BYTE)blue; + pixels[1] = (BYTE)green; + pixels[2] = (BYTE)red; + pixels[3] = 0xff; + + pixels += 4; + } + y += thread->num_cores; + count--; + } +} +#else +void ApplySpecialColormapRGBACommand::Execute(DrawerThread *thread) +{ + int y = thread->skipped_by_thread(0); + int count = thread->count_for_thread(0, height); + __m128i gray_weight = _mm_set_epi16(256, 77, 143, 37, 256, 77, 143, 37); + __m128i start_end = _mm_set_epi16(255, start_red, start_green, start_blue, 255, end_red, end_green, end_blue); + while (count > 0) + { + BYTE *pixels = buffer + y * pitch * 4; + int sse_length = width / 4; + for (int x = 0; x < sse_length; x++) + { + // Unpack to integers: + __m128i p = _mm_loadu_si128((const __m128i*)pixels); + + __m128i p16_0 = _mm_unpacklo_epi8(p, _mm_setzero_si128()); + __m128i p16_1 = _mm_unpackhi_epi8(p, _mm_setzero_si128()); + + // Add gray weighting to colors + __m128i mullo0 = _mm_mullo_epi16(p16_0, gray_weight); + __m128i mullo1 = _mm_mullo_epi16(p16_1, gray_weight); + __m128i p32_0 = _mm_unpacklo_epi16(mullo0, _mm_setzero_si128()); + __m128i p32_1 = _mm_unpackhi_epi16(mullo0, _mm_setzero_si128()); + __m128i p32_2 = _mm_unpacklo_epi16(mullo1, _mm_setzero_si128()); + __m128i p32_3 = _mm_unpackhi_epi16(mullo1, _mm_setzero_si128()); + + // Transpose to get color components in individual vectors: + __m128 tmpx = _mm_castsi128_ps(p32_0); + __m128 tmpy = _mm_castsi128_ps(p32_1); + __m128 tmpz = _mm_castsi128_ps(p32_2); + __m128 tmpw = _mm_castsi128_ps(p32_3); + _MM_TRANSPOSE4_PS(tmpx, tmpy, tmpz, tmpw); + __m128i blue = _mm_castps_si128(tmpx); + __m128i green = _mm_castps_si128(tmpy); + __m128i red = _mm_castps_si128(tmpz); + __m128i alpha = _mm_castps_si128(tmpw); + + // Calculate gray and 256-gray values: + __m128i gray = _mm_srli_epi32(_mm_add_epi32(_mm_add_epi32(red, green), blue), 8); + __m128i inv_gray = _mm_sub_epi32(_mm_set1_epi32(256), gray); + + // p32 = start * inv_gray + end * gray: + __m128i gray0 = _mm_shuffle_epi32(gray, _MM_SHUFFLE(0, 0, 0, 0)); + __m128i gray1 = _mm_shuffle_epi32(gray, _MM_SHUFFLE(1, 1, 1, 1)); + __m128i gray2 = _mm_shuffle_epi32(gray, _MM_SHUFFLE(2, 2, 2, 2)); + __m128i gray3 = _mm_shuffle_epi32(gray, _MM_SHUFFLE(3, 3, 3, 3)); + __m128i inv_gray0 = _mm_shuffle_epi32(inv_gray, _MM_SHUFFLE(0, 0, 0, 0)); + __m128i inv_gray1 = _mm_shuffle_epi32(inv_gray, _MM_SHUFFLE(1, 1, 1, 1)); + __m128i inv_gray2 = _mm_shuffle_epi32(inv_gray, _MM_SHUFFLE(2, 2, 2, 2)); + __m128i inv_gray3 = _mm_shuffle_epi32(inv_gray, _MM_SHUFFLE(3, 3, 3, 3)); + __m128i gray16_0 = _mm_packs_epi32(gray0, inv_gray0); + __m128i gray16_1 = _mm_packs_epi32(gray1, inv_gray1); + __m128i gray16_2 = _mm_packs_epi32(gray2, inv_gray2); + __m128i gray16_3 = _mm_packs_epi32(gray3, inv_gray3); + __m128i gray16_0_mullo = _mm_mullo_epi16(gray16_0, start_end); + __m128i gray16_1_mullo = _mm_mullo_epi16(gray16_1, start_end); + __m128i gray16_2_mullo = _mm_mullo_epi16(gray16_2, start_end); + __m128i gray16_3_mullo = _mm_mullo_epi16(gray16_3, start_end); + __m128i gray16_0_mulhi = _mm_mulhi_epi16(gray16_0, start_end); + __m128i gray16_1_mulhi = _mm_mulhi_epi16(gray16_1, start_end); + __m128i gray16_2_mulhi = _mm_mulhi_epi16(gray16_2, start_end); + __m128i gray16_3_mulhi = _mm_mulhi_epi16(gray16_3, start_end); + p32_0 = _mm_srli_epi32(_mm_add_epi32(_mm_unpacklo_epi16(gray16_0_mullo, gray16_0_mulhi), _mm_unpackhi_epi16(gray16_0_mullo, gray16_0_mulhi)), 8); + p32_1 = _mm_srli_epi32(_mm_add_epi32(_mm_unpacklo_epi16(gray16_1_mullo, gray16_1_mulhi), _mm_unpackhi_epi16(gray16_1_mullo, gray16_1_mulhi)), 8); + p32_2 = _mm_srli_epi32(_mm_add_epi32(_mm_unpacklo_epi16(gray16_2_mullo, gray16_2_mulhi), _mm_unpackhi_epi16(gray16_2_mullo, gray16_2_mulhi)), 8); + p32_3 = _mm_srli_epi32(_mm_add_epi32(_mm_unpacklo_epi16(gray16_3_mullo, gray16_3_mulhi), _mm_unpackhi_epi16(gray16_3_mullo, gray16_3_mulhi)), 8); + + p16_0 = _mm_packs_epi32(p32_0, p32_1); + p16_1 = _mm_packs_epi32(p32_2, p32_3); + p = _mm_packus_epi16(p16_0, p16_1); + + _mm_storeu_si128((__m128i*)pixels, p); + pixels += 16; + } + + for (int x = sse_length * 4; x < width; x++) + { + int fg_red = pixels[2]; + int fg_green = pixels[1]; + int fg_blue = pixels[0]; + + int gray = (fg_red * 77 + fg_green * 143 + fg_blue * 37) >> 8; + gray += (gray >> 7); // gray*=256/255 + int inv_gray = 256 - gray; + + int red = clamp((start_red * inv_gray + end_red * gray) >> 8, 0, 255); + int green = clamp((start_green * inv_gray + end_green * gray) >> 8, 0, 255); + int blue = clamp((start_blue * inv_gray + end_blue * gray) >> 8, 0, 255); + + pixels[0] = (BYTE)blue; + pixels[1] = (BYTE)green; + pixels[2] = (BYTE)red; + pixels[3] = 0xff; + + pixels += 4; + } + + y += thread->num_cores; + count--; + } +} +#endif + ///////////////////////////////////////////////////////////////////////////// void R_BeginDrawerCommands() diff --git a/src/r_main.cpp b/src/r_main.cpp index c1b78303b..2eb0ce141 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -459,7 +459,7 @@ void R_SetupColormap(player_t *player) if (player->fixedcolormap >= 0 && player->fixedcolormap < (int)SpecialColormaps.Size()) { realfixedcolormap = &SpecialColormaps[player->fixedcolormap]; - if (RenderTarget == screen && (DFrameBuffer *)screen->Accel2D && r_shadercolormaps) + if (RenderTarget == screen && (r_swtruecolor || ((DFrameBuffer *)screen->Accel2D && r_shadercolormaps))) { // Render everything fullbright. The copy to video memory will // apply the special colormap, so it won't be restricted to the @@ -935,7 +935,7 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) // If we don't want shadered colormaps, NULL it now so that the // copy to the screen does not use a special colormap shader. - if (!r_shadercolormaps) + if (!r_shadercolormaps && !r_swtruecolor) { realfixedcolormap = NULL; } diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 11f879c38..c4558bf7c 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -43,6 +43,7 @@ #include "textures/textures.h" #include "r_data/voxels.h" +EXTERN_CVAR(Bool, r_shadercolormaps) class FArchive; void R_SWRSetWindow(int windowSize, int fullWidth, int fullHeight, int stHeight, int trueratio); @@ -166,6 +167,13 @@ void FSoftwareRenderer::RenderView(player_t *player) R_RenderActorView (player->mo); // [RH] Let cameras draw onto textures that were visible this frame. FCanvasTextureInfo::UpdateAll (); + + // Apply special colormap if the target cannot do it + if (realfixedcolormap && r_swtruecolor && !(r_shadercolormaps && screen->Accel2D)) + { + DrawerCommandQueue::QueueCommand(realfixedcolormap, screen); + } + R_EndDrawerCommands(); } diff --git a/src/r_things.cpp b/src/r_things.cpp index 0c5e17b7c..2dc0bdb6c 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -1423,7 +1423,7 @@ void R_DrawPSprite (pspdef_t* psp, int pspnum, AActor *owner, double sx, double } } - if (realfixedcolormap != NULL) + if (realfixedcolormap != NULL && (!r_swtruecolor || (r_shadercolormaps && screen->Accel2D))) { // fixed color vis->Style.BaseColormap = realfixedcolormap; vis->Style.ColormapNum = 0; From cc10c2a97045010453c610e68e4a66cef6e36dd9 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 13 Jun 2016 03:16:48 +0200 Subject: [PATCH 38/94] Fix cameras and kdizd intro for true color mode --- src/r_main.cpp | 4 +-- src/r_swrenderer.cpp | 35 +++++++++++++++++++----- src/textures/canvastexture.cpp | 49 ++++++++++++++++++++++++++++++++-- src/textures/texture.cpp | 36 +++++++++++++++++++++++++ src/textures/textures.h | 18 +++++++++---- 5 files changed, 126 insertions(+), 16 deletions(-) diff --git a/src/r_main.cpp b/src/r_main.cpp index 2eb0ce141..4e5ff1dbd 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -974,6 +974,8 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, R_RenderActorView (actor, dontmaplines); + R_EndDrawerCommands(); + RenderTarget = screen; bRenderingToCanvas = false; R_ExecuteSetViewSize (); @@ -981,8 +983,6 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, R_SetupBuffer (); screen->Unlock (); - R_EndDrawerCommands(); - viewactive = savedviewactive; r_swtruecolor = savedoutputformat; diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index c4558bf7c..556323df5 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -87,11 +87,17 @@ void FSoftwareRenderer::PrecacheTexture(FTexture *tex, int cache) if (cache & FTextureManager::HIT_Columnmode) { const FTexture::Span *spanp; - tex->GetColumn(0, &spanp); + /*if (r_swtruecolor) + tex->GetColumnBgra(0, &spanp); + else*/ + tex->GetColumn(0, &spanp); } else if (cache != 0) { - tex->GetPixels (); + if (r_swtruecolor) + tex->GetPixels(); + else + tex->GetPixels (); } else { @@ -328,8 +334,8 @@ void FSoftwareRenderer::CopyStackedViewParameters() void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoint, int fov) { - BYTE *Pixels = const_cast(tex->GetPixels()); - DSimpleCanvas *Canvas = tex->GetCanvas(); + BYTE *Pixels = r_swtruecolor ? (BYTE*)tex->GetPixelsBgra() : (BYTE*)tex->GetPixels(); + DSimpleCanvas *Canvas = r_swtruecolor ? tex->GetCanvasBgra() : tex->GetCanvas(); // curse Doom's overuse of global variables in the renderer. // These get clobbered by rendering to a camera texture but they need to be preserved so the final rendering can be done with the correct palette. @@ -340,13 +346,28 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin R_SetFOV ((double)fov); R_RenderViewToCanvas (viewpoint, Canvas, 0, 0, tex->GetWidth(), tex->GetHeight(), tex->bFirstUpdate); R_SetFOV (savedfov); - if (Pixels == Canvas->GetBuffer()) + + if (Canvas->IsBgra()) { - FTexture::FlipSquareBlockRemap (Pixels, tex->GetWidth(), tex->GetHeight(), GPalette.Remap); + if (Pixels == Canvas->GetBuffer()) + { + FTexture::FlipSquareBlockBgra((uint32_t*)Pixels, tex->GetWidth(), tex->GetHeight()); + } + else + { + FTexture::FlipNonSquareBlockBgra((uint32_t*)Pixels, (const uint32_t*)Canvas->GetBuffer(), tex->GetWidth(), tex->GetHeight(), Canvas->GetPitch()); + } } else { - FTexture::FlipNonSquareBlockRemap (Pixels, Canvas->GetBuffer(), tex->GetWidth(), tex->GetHeight(), Canvas->GetPitch(), GPalette.Remap); + if (Pixels == Canvas->GetBuffer()) + { + FTexture::FlipSquareBlockRemap(Pixels, tex->GetWidth(), tex->GetHeight(), GPalette.Remap); + } + else + { + FTexture::FlipNonSquareBlockRemap(Pixels, Canvas->GetBuffer(), tex->GetWidth(), tex->GetHeight(), Canvas->GetPitch(), GPalette.Remap); + } } tex->SetUpdated(); fixedcolormap = savecolormap; diff --git a/src/textures/canvastexture.cpp b/src/textures/canvastexture.cpp index 7242149a4..a72546d78 100644 --- a/src/textures/canvastexture.cpp +++ b/src/textures/canvastexture.cpp @@ -53,7 +53,6 @@ FCanvasTexture::FCanvasTexture (const char *name, int width, int height) DummySpans[1].TopOffset = 0; DummySpans[1].Length = 0; UseType = TEX_Wall; - Canvas = NULL; bNeedsUpdate = true; bDidUpdate = false; bHasCanvas = true; @@ -101,6 +100,16 @@ const BYTE *FCanvasTexture::GetPixels () return Pixels; } +const uint32_t *FCanvasTexture::GetPixelsBgra() +{ + bNeedsUpdate = true; + if (CanvasBgra == NULL) + { + MakeTextureBgra(); + } + return PixelsBgra; +} + void FCanvasTexture::MakeTexture () { Canvas = new DSimpleCanvas (Width, Height, false); @@ -123,21 +132,57 @@ void FCanvasTexture::MakeTexture () memset (Pixels+Width*Height/2, 255, Width*Height/2); } +void FCanvasTexture::MakeTextureBgra() +{ + CanvasBgra = new DSimpleCanvas(Width, Height, true); + CanvasBgra->Lock(); + GC::AddSoftRoot(CanvasBgra); + + if (Width != Height || Width != CanvasBgra->GetPitch()) + { + PixelsBgra = new uint32_t[Width*Height]; + bPixelsAllocatedBgra = true; + } + else + { + PixelsBgra = (uint32_t*)CanvasBgra->GetBuffer(); + bPixelsAllocatedBgra = false; + } + + // Draw a special "unrendered" initial texture into the buffer. + memset(PixelsBgra, 0, Width*Height / 2 * 4); + memset(PixelsBgra + Width*Height / 2, 255, Width*Height / 2 * 4); +} + void FCanvasTexture::Unload () { if (bPixelsAllocated) { - if (Pixels != NULL) delete [] Pixels; + if (Pixels != NULL) delete[] Pixels; bPixelsAllocated = false; Pixels = NULL; } + if (bPixelsAllocatedBgra) + { + if (PixelsBgra != NULL) delete[] PixelsBgra; + bPixelsAllocatedBgra = false; + PixelsBgra = NULL; + } + if (Canvas != NULL) { GC::DelSoftRoot(Canvas); Canvas->Destroy(); Canvas = NULL; } + + if (CanvasBgra != NULL) + { + GC::DelSoftRoot(CanvasBgra); + CanvasBgra->Destroy(); + CanvasBgra = NULL; + } } bool FCanvasTexture::CheckModified () diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index d50081062..28a3b9333 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -410,6 +410,29 @@ void FTexture::FlipSquareBlock (BYTE *block, int x, int y) } } +void FTexture::FlipSquareBlockBgra(uint32_t *block, int x, int y) +{ + int i, j; + + if (x != y) return; + + for (i = 0; i < x; ++i) + { + uint32_t *corner = block + x*i + i; + int count = x - i; + if (count & 1) + { + count--; + swapvalues(corner[count], corner[count*x]); + } + for (j = 0; j < count; j += 2) + { + swapvalues(corner[j], corner[j*x]); + swapvalues(corner[j + 1], corner[(j + 1)*x]); + } + } +} + void FTexture::FlipSquareBlockRemap (BYTE *block, int x, int y, const BYTE *remap) { int i, j; @@ -453,6 +476,19 @@ void FTexture::FlipNonSquareBlock (BYTE *dst, const BYTE *src, int x, int y, int } } +void FTexture::FlipNonSquareBlockBgra(uint32_t *dst, const uint32_t *src, int x, int y, int srcpitch) +{ + int i, j; + + for (i = 0; i < x; ++i) + { + for (j = 0; j < y; ++j) + { + dst[i*y + j] = src[i + j*srcpitch]; + } + } +} + void FTexture::FlipNonSquareBlockRemap (BYTE *dst, const BYTE *src, int x, int y, int srcpitch, const BYTE *remap) { int i, j; diff --git a/src/textures/textures.h b/src/textures/textures.h index 0d066eff5..872c83b1a 100644 --- a/src/textures/textures.h +++ b/src/textures/textures.h @@ -274,8 +274,10 @@ private: public: static void FlipSquareBlock (BYTE *block, int x, int y); + static void FlipSquareBlockBgra (uint32_t *block, int x, int y); static void FlipSquareBlockRemap (BYTE *block, int x, int y, const BYTE *remap); static void FlipNonSquareBlock (BYTE *blockto, const BYTE *blockfrom, int x, int y, int srcpitch); + static void FlipNonSquareBlockBgra (uint32_t *blockto, const uint32_t *blockfrom, int x, int y, int srcpitch); static void FlipNonSquareBlockRemap (BYTE *blockto, const BYTE *blockfrom, int x, int y, int srcpitch, const BYTE *remap); friend class D3DTex; @@ -518,21 +520,27 @@ public: const BYTE *GetColumn (unsigned int column, const Span **spans_out); const BYTE *GetPixels (); + const uint32_t *GetPixelsBgra() override; void Unload (); bool CheckModified (); void NeedUpdate() { bNeedsUpdate=true; } void SetUpdated() { bNeedsUpdate = false; bDidUpdate = true; bFirstUpdate = false; } DSimpleCanvas *GetCanvas() { return Canvas; } + DSimpleCanvas *GetCanvasBgra() { return CanvasBgra; } void MakeTexture (); + void MakeTextureBgra (); protected: - DSimpleCanvas *Canvas; - BYTE *Pixels; + DSimpleCanvas *Canvas = nullptr; + DSimpleCanvas *CanvasBgra = nullptr; + BYTE *Pixels = nullptr; + uint32_t *PixelsBgra = nullptr; Span DummySpans[2]; - bool bNeedsUpdate; - bool bDidUpdate; - bool bPixelsAllocated; + bool bNeedsUpdate = true; + bool bDidUpdate = false; + bool bPixelsAllocated = false; + bool bPixelsAllocatedBgra = false; public: bool bFirstUpdate; From e31331bed265925a2e03d66658863e9c26f2ca26 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 13 Jun 2016 19:09:48 +0200 Subject: [PATCH 39/94] Sloped plane adjustments --- src/r_draw.cpp | 4 +- src/r_plane.cpp | 126 +++++------------------------------------------- 2 files changed, 14 insertions(+), 116 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 70b3893f4..ec7313c4f 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -2325,7 +2325,7 @@ void R_InitColumnDrawers () R_FillColumnHoriz = R_FillColumnHorizP_RGBA_C; R_DrawFogBoundary = R_DrawFogBoundary_RGBA; - R_MapTiltedPlane = R_MapColoredPlane_RGBA; + R_MapTiltedPlane = R_MapTiltedPlane_RGBA; R_MapColoredPlane = R_MapColoredPlane_RGBA; R_DrawParticle = R_DrawParticle_RGBA; @@ -2422,7 +2422,7 @@ void R_InitColumnDrawers () R_FillColumnHoriz = R_FillColumnHorizP_C; R_DrawFogBoundary = R_DrawFogBoundary_C; - R_MapTiltedPlane = R_MapColoredPlane_C; + R_MapTiltedPlane = R_MapTiltedPlane_C; R_MapColoredPlane = R_MapColoredPlane_C; R_DrawParticle = R_DrawParticle_C; diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 05fce79a6..1cde16071 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -480,124 +480,22 @@ void R_MapTiltedPlane_C (int y, int x1) void R_MapTiltedPlane_RGBA (int y, int x1) { int x2 = spanend[y]; - int width = x2 - x1; - double iz, uz, vz; - uint32_t *fb; - DWORD u, v; - int i; - iz = plane_sz[2] + plane_sz[1]*(centery-y) + plane_sz[0]*(x1-centerx); + // Slopes are broken currently in master. + // Until R_DrawTiltedPlane is fixed we are just going to fill with a solid color. - // Lighting is simple. It's just linear interpolation from start to end - if (plane_shade) + uint32_t *source = (uint32_t*)ds_source; + int source_width = 1 << ds_xbits; + int source_height = 1 << ds_ybits; + + uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; + + int count = x2 - x1 + 1; + while (count > 0) { - uz = (iz + plane_sz[0]*width) * planelightfloat; - vz = iz * planelightfloat; - R_CalcTiltedLighting (vz, uz, width); + *(dest++) = source[0]; + count--; } - - uz = plane_su[2] + plane_su[1]*(centery-y) + plane_su[0]*(x1-centerx); - vz = plane_sv[2] + plane_sv[1]*(centery-y) + plane_sv[0]*(x1-centerx); - - fb = ylookup[y] + x1 + (uint32_t*)dc_destorg; - - BYTE vshift = 32 - ds_ybits; - BYTE ushift = vshift - ds_xbits; - int umask = ((1 << ds_xbits) - 1) << ds_ybits; - -#if 0 // The "perfect" reference version of this routine. Pretty slow. - // Use it only to see how things are supposed to look. - i = 0; - do - { - double z = 1.f/iz; - - u = SQWORD(uz*z) + pviewx; - v = SQWORD(vz*z) + pviewy; - R_SetDSColorMapLight(tiltlighting[i], 0, 0); - fb[i++] = ds_colormap[ds_source[(v >> vshift) | ((u >> ushift) & umask)]]; - iz += plane_sz[0]; - uz += plane_su[0]; - vz += plane_sv[0]; - } while (--width >= 0); -#else -//#define SPANSIZE 32 -//#define INVSPAN 0.03125f -//#define SPANSIZE 8 -//#define INVSPAN 0.125f -#define SPANSIZE 16 -#define INVSPAN 0.0625f - - double startz = 1.f/iz; - double startu = uz*startz; - double startv = vz*startz; - double izstep, uzstep, vzstep; - - izstep = plane_sz[0] * SPANSIZE; - uzstep = plane_su[0] * SPANSIZE; - vzstep = plane_sv[0] * SPANSIZE; - x1 = 0; - width++; - - while (width >= SPANSIZE) - { - iz += izstep; - uz += uzstep; - vz += vzstep; - - double endz = 1.f/iz; - double endu = uz*endz; - double endv = vz*endz; - DWORD stepu = SQWORD((endu - startu) * INVSPAN); - DWORD stepv = SQWORD((endv - startv) * INVSPAN); - u = SQWORD(startu) + pviewx; - v = SQWORD(startv) + pviewy; - - for (i = SPANSIZE-1; i >= 0; i--) - { - fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); - x1++; - u += stepu; - v += stepv; - } - startu = endu; - startv = endv; - width -= SPANSIZE; - } - if (width > 0) - { - if (width == 1) - { - u = SQWORD(startu); - v = SQWORD(startv); - fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); - } - else - { - double left = width; - iz += plane_sz[0] * left; - uz += plane_su[0] * left; - vz += plane_sv[0] * left; - - double endz = 1.f/iz; - double endu = uz*endz; - double endv = vz*endz; - left = 1.f/left; - DWORD stepu = SQWORD((endu - startu) * left); - DWORD stepv = SQWORD((endv - startv) * left); - u = SQWORD(startu) + pviewx; - v = SQWORD(startv) + pviewy; - - for (; width != 0; width--) - { - fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); - x1++; - u += stepu; - v += stepv; - } - } - } -#endif } //========================================================================== From 3ce2d8365dd6a91c068a20b0caf4b683634ceba3 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 13 Jun 2016 20:01:31 +0200 Subject: [PATCH 40/94] Fix HUD colors when hw2d is off --- src/r_draw_rgba.cpp | 9 ++++++--- src/r_drawt_rgba.cpp | 19 +++++++++++++------ src/v_draw.cpp | 13 +++++++++---- 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index d5c275d0e..7e9f85117 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -325,11 +325,11 @@ public: { int pitch = dc_pitch * thread->num_cores; - BYTE color = dc_color; + uint32_t color = shade_pal_index_simple(dc_color, light); do { - *dest = shade_pal_index_simple(color, light); + *dest = color; dest += pitch; } while (--count); } @@ -629,6 +629,7 @@ class DrawAddColumnRGBACommand : public DrawerCommand ShadeConstants dc_shade_constants; fixed_t dc_srcalpha; fixed_t dc_destalpha; + BYTE *dc_colormap; public: DrawAddColumnRGBACommand() @@ -643,6 +644,7 @@ public: dc_shade_constants = ::dc_shade_constants; dc_srcalpha = ::dc_srcalpha; dc_destalpha = ::dc_destalpha; + dc_colormap = ::dc_colormap; } void Execute(DrawerThread *thread) override @@ -667,13 +669,14 @@ public: uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; + BYTE *colormap = dc_colormap; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); + uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 5f0fc4156..cd124ac63 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -512,6 +512,7 @@ class RtAdd1colRGBACommand : public DrawerCommand ShadeConstants dc_shade_constants; fixed_t dc_srcalpha; fixed_t dc_destalpha; + BYTE *dc_colormap; public: RtAdd1colRGBACommand(int hx, int sx, int yl, int yh) @@ -527,6 +528,7 @@ public: dc_shade_constants = ::dc_shade_constants; dc_srcalpha = ::dc_srcalpha; dc_destalpha = ::dc_destalpha; + dc_colormap = ::dc_colormap; } void Execute(DrawerThread *thread) override @@ -548,12 +550,13 @@ public: uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; + BYTE *colormap = dc_colormap; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(*source, light, shade_constants); + uint32_t fg = shade_pal_index(colormap[*source], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -583,6 +586,7 @@ class RtAdd4colsRGBACommand : public DrawerCommand int dc_pitch; fixed_t dc_light; ShadeConstants dc_shade_constants; + BYTE *dc_colormap; public: RtAdd4colsRGBACommand(int sx, int yl, int yh) @@ -595,6 +599,7 @@ public: dc_pitch = ::dc_pitch; dc_light = ::dc_light; dc_shade_constants = ::dc_shade_constants; + dc_colormap = ::dc_colormap; } #ifdef NO_SSE @@ -617,6 +622,7 @@ public: uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; + BYTE *colormap = dc_colormap; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -624,7 +630,7 @@ public: do { for (int i = 0; i < 4; i++) { - uint32_t fg = shade_pal_index(source[i], light, shade_constants); + uint32_t fg = shade_pal_index(colormap[source[i]], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -664,6 +670,7 @@ public: uint32_t light = calc_light_multiplier(dc_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; + BYTE *colormap = dc_colormap; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -678,10 +685,10 @@ public: __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); do { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; // shade_pal_index: __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); diff --git a/src/v_draw.cpp b/src/v_draw.cpp index c2dbf31c5..d03853c11 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -1095,9 +1095,10 @@ void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 real } if (IsBgra()) { + uint32_t fillColor = GPalette.BaseColors[palColor].d; uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0; for (int i = 0; i <= deltaX; i++) - spot[i] = palColor; + spot[i] = fillColor; } else { @@ -1108,11 +1109,12 @@ void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 real { // vertical line if (IsBgra()) { + uint32_t fillColor = GPalette.BaseColors[palColor].d; uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0; int pitch = GetPitch(); do { - *spot = palColor; + *spot = fillColor; spot += pitch; } while (--deltaY != 0); } @@ -1131,11 +1133,12 @@ void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 real { // diagonal line. if (IsBgra()) { + uint32_t fillColor = GPalette.BaseColors[palColor].d; uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0; int advance = GetPitch() + xDir; do { - *spot = palColor; + *spot = fillColor; spot += advance; } while (--deltaY != 0); } @@ -1299,12 +1302,14 @@ void DCanvas::Clear (int left, int top, int right, int bottom, int palcolor, uin if (IsBgra()) { + uint32_t fill_color = GPalette.BaseColors[palcolor]; + uint32_t *dest = (uint32_t*)Buffer + top * Pitch + left; x = right - left; for (y = top; y < bottom; y++) { for (int i = 0; i < x; i++) - dest[i] = palcolor; + dest[i] = fill_color; dest += Pitch; } } From 8ba6a4f17501e34db5b644567fcd40e06502017c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 13 Jun 2016 21:39:55 +0200 Subject: [PATCH 41/94] Precache, Unload and FillSimplePoly bug fix --- src/g_strife/strife_sbar.cpp | 5 ----- src/menu/playerdisplay.cpp | 5 ----- src/r_swrenderer.cpp | 4 ++-- src/textures/automaptexture.cpp | 1 + src/textures/buildtexture.cpp | 12 ------------ src/textures/canvastexture.cpp | 2 ++ src/textures/ddstexture.cpp | 1 + src/textures/flattexture.cpp | 1 + src/textures/imgztexture.cpp | 1 + src/textures/jpegtexture.cpp | 3 +-- src/textures/multipatchtexture.cpp | 1 + src/textures/patchtexture.cpp | 1 + src/textures/pcxtexture.cpp | 1 + src/textures/pngtexture.cpp | 3 +-- src/textures/rawpagetexture.cpp | 1 + src/textures/texture.cpp | 20 ++++++++++++-------- src/textures/textures.h | 6 ++---- src/textures/tgatexture.cpp | 1 + src/textures/warptexture.cpp | 1 + src/v_draw.cpp | 2 +- src/v_font.cpp | 2 ++ src/v_video.cpp | 11 ----------- 22 files changed, 33 insertions(+), 52 deletions(-) diff --git a/src/g_strife/strife_sbar.cpp b/src/g_strife/strife_sbar.cpp index bcdf624d7..eb3fa2608 100644 --- a/src/g_strife/strife_sbar.cpp +++ b/src/g_strife/strife_sbar.cpp @@ -35,7 +35,6 @@ public: const BYTE *GetColumn (unsigned int column, const Span **spans_out); const BYTE *GetPixels (); bool CheckModified (); - void Unload (); void SetVial (int level); @@ -90,10 +89,6 @@ bool FHealthBar::CheckModified () return NeedRefresh; } -void FHealthBar::Unload () -{ -} - const BYTE *FHealthBar::GetColumn (unsigned int column, const Span **spans_out) { if (NeedRefresh) diff --git a/src/menu/playerdisplay.cpp b/src/menu/playerdisplay.cpp index c3d11a43a..16671975a 100644 --- a/src/menu/playerdisplay.cpp +++ b/src/menu/playerdisplay.cpp @@ -78,7 +78,6 @@ public: const BYTE *GetColumn(unsigned int column, const Span **spans_out); const BYTE *GetPixels(); - void Unload(); bool CheckModified(); protected: @@ -212,10 +211,6 @@ bool FBackdropTexture::CheckModified() return LastRenderTic != gametic; } -void FBackdropTexture::Unload() -{ -} - //============================================================================= // // diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 556323df5..5be41660e 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -87,9 +87,9 @@ void FSoftwareRenderer::PrecacheTexture(FTexture *tex, int cache) if (cache & FTextureManager::HIT_Columnmode) { const FTexture::Span *spanp; - /*if (r_swtruecolor) + if (r_swtruecolor) tex->GetColumnBgra(0, &spanp); - else*/ + else tex->GetColumn(0, &spanp); } else if (cache != 0) diff --git a/src/textures/automaptexture.cpp b/src/textures/automaptexture.cpp index 67d68b9fe..9aac379ef 100644 --- a/src/textures/automaptexture.cpp +++ b/src/textures/automaptexture.cpp @@ -122,6 +122,7 @@ void FAutomapTexture::Unload () delete[] Pixels; Pixels = NULL; } + FTexture::Unload(); } //========================================================================== diff --git a/src/textures/buildtexture.cpp b/src/textures/buildtexture.cpp index bfcc6333d..1155dacc4 100644 --- a/src/textures/buildtexture.cpp +++ b/src/textures/buildtexture.cpp @@ -56,7 +56,6 @@ public: const BYTE *GetColumn (unsigned int column, const Span **spans_out); const BYTE *GetPixels (); - void Unload (); protected: const BYTE *Pixels; @@ -103,17 +102,6 @@ FBuildTexture::~FBuildTexture () // //========================================================================== -void FBuildTexture::Unload () -{ - // Nothing to do, since the pixels are accessed from memory-mapped files directly -} - -//========================================================================== -// -// -// -//========================================================================== - const BYTE *FBuildTexture::GetPixels () { return Pixels; diff --git a/src/textures/canvastexture.cpp b/src/textures/canvastexture.cpp index a72546d78..109d927ab 100644 --- a/src/textures/canvastexture.cpp +++ b/src/textures/canvastexture.cpp @@ -183,6 +183,8 @@ void FCanvasTexture::Unload () CanvasBgra->Destroy(); CanvasBgra = NULL; } + + FTexture::Unload(); } bool FCanvasTexture::CheckModified () diff --git a/src/textures/ddstexture.cpp b/src/textures/ddstexture.cpp index 31e748022..fb4de34c5 100644 --- a/src/textures/ddstexture.cpp +++ b/src/textures/ddstexture.cpp @@ -401,6 +401,7 @@ void FDDSTexture::Unload () delete[] Pixels; Pixels = NULL; } + FTexture::Unload(); } //========================================================================== diff --git a/src/textures/flattexture.cpp b/src/textures/flattexture.cpp index 840d53aaf..08e0d1221 100644 --- a/src/textures/flattexture.cpp +++ b/src/textures/flattexture.cpp @@ -138,6 +138,7 @@ void FFlatTexture::Unload () delete[] Pixels; Pixels = NULL; } + FTexture::Unload(); } //========================================================================== diff --git a/src/textures/imgztexture.cpp b/src/textures/imgztexture.cpp index 1c262d707..04932d4bf 100644 --- a/src/textures/imgztexture.cpp +++ b/src/textures/imgztexture.cpp @@ -142,6 +142,7 @@ void FIMGZTexture::Unload () delete[] Pixels; Pixels = NULL; } + FTexture::Unload(); } //========================================================================== diff --git a/src/textures/jpegtexture.cpp b/src/textures/jpegtexture.cpp index a37eff6c3..3b5359846 100644 --- a/src/textures/jpegtexture.cpp +++ b/src/textures/jpegtexture.cpp @@ -196,7 +196,6 @@ public: protected: BYTE *Pixels; - std::vector PixelsBgra; Span DummySpans[2]; void MakeTexture (); @@ -300,7 +299,7 @@ void FJPEGTexture::Unload () { delete[] Pixels; Pixels = NULL; - PixelsBgra.clear(); + FTexture::Unload(); } //========================================================================== diff --git a/src/textures/multipatchtexture.cpp b/src/textures/multipatchtexture.cpp index b0db481a8..6ae45c785 100644 --- a/src/textures/multipatchtexture.cpp +++ b/src/textures/multipatchtexture.cpp @@ -362,6 +362,7 @@ void FMultiPatchTexture::Unload () delete[] Pixels; Pixels = NULL; } + FTexture::Unload(); } //========================================================================== diff --git a/src/textures/patchtexture.cpp b/src/textures/patchtexture.cpp index 423ce4deb..8388515c0 100644 --- a/src/textures/patchtexture.cpp +++ b/src/textures/patchtexture.cpp @@ -184,6 +184,7 @@ void FPatchTexture::Unload () delete[] Pixels; Pixels = NULL; } + FTexture::Unload(); } //========================================================================== diff --git a/src/textures/pcxtexture.cpp b/src/textures/pcxtexture.cpp index 0ec5d2933..42a13b85a 100644 --- a/src/textures/pcxtexture.cpp +++ b/src/textures/pcxtexture.cpp @@ -191,6 +191,7 @@ void FPCXTexture::Unload () delete[] Pixels; Pixels = NULL; } + FTexture::Unload(); } //========================================================================== diff --git a/src/textures/pngtexture.cpp b/src/textures/pngtexture.cpp index 95f7aca75..206797a34 100644 --- a/src/textures/pngtexture.cpp +++ b/src/textures/pngtexture.cpp @@ -67,7 +67,6 @@ protected: FString SourceFile; BYTE *Pixels; - std::vector PixelsBgra; Span **Spans; BYTE BitDepth; @@ -382,7 +381,7 @@ void FPNGTexture::Unload () { delete[] Pixels; Pixels = NULL; - PixelsBgra.clear(); + FTexture::Unload(); } //========================================================================== diff --git a/src/textures/rawpagetexture.cpp b/src/textures/rawpagetexture.cpp index 1402f8844..69313fd1c 100644 --- a/src/textures/rawpagetexture.cpp +++ b/src/textures/rawpagetexture.cpp @@ -206,6 +206,7 @@ void FRawPageTexture::Unload () delete[] Pixels; Pixels = NULL; } + FTexture::Unload(); } //========================================================================== diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index 28a3b9333..0030719cb 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -176,6 +176,11 @@ FTexture::~FTexture () KillNative(); } +void FTexture::Unload() +{ + PixelsBgra = std::vector(); +} + const uint32_t *FTexture::GetColumnBgra(unsigned int column, const Span **spans_out) { const uint32_t *pixels = GetPixelsBgra(); @@ -189,16 +194,19 @@ const uint32_t *FTexture::GetColumnBgra(unsigned int column, const Span **spans_ const uint32_t *FTexture::GetPixelsBgra() { - if (BgraPixels.empty()) + if (PixelsBgra.empty()) { + GetColumn(0, nullptr); const BYTE *indices = GetPixels(); - BgraPixels.resize(Width * Height); + if (indices == nullptr) + return nullptr; + PixelsBgra.resize(Width * Height); for (int i = 0; i < Width * Height; i++) { - BgraPixels[i] = GPalette.BaseColors[indices[i]].d; + PixelsBgra[i] = GPalette.BaseColors[indices[i]].d; } } - return BgraPixels.data(); + return PixelsBgra.data(); } bool FTexture::CheckModified () @@ -642,10 +650,6 @@ FDummyTexture::FDummyTexture () UseType = TEX_Null; } -void FDummyTexture::Unload () -{ -} - void FDummyTexture::SetSize (int width, int height) { Width = width; diff --git a/src/textures/textures.h b/src/textures/textures.h index 872c83b1a..38d1ef487 100644 --- a/src/textures/textures.h +++ b/src/textures/textures.h @@ -192,7 +192,7 @@ public: virtual FTexture *GetRedirect(bool wantwarped); virtual FTexture *GetRawTexture(); // for FMultiPatchTexture to override - virtual void Unload () = 0; + virtual void Unload (); // Returns the native pixel format for this image virtual FTextureFormat GetFormat(); @@ -269,8 +269,7 @@ protected: Rotations = other->Rotations; } -private: - std::vector BgraPixels; + std::vector PixelsBgra; public: static void FlipSquareBlock (BYTE *block, int x, int y); @@ -472,7 +471,6 @@ public: FDummyTexture (); const BYTE *GetColumn (unsigned int column, const Span **spans_out); const BYTE *GetPixels (); - void Unload (); void SetSize (int width, int height); }; diff --git a/src/textures/tgatexture.cpp b/src/textures/tgatexture.cpp index b208a51a3..5e76a63b2 100644 --- a/src/textures/tgatexture.cpp +++ b/src/textures/tgatexture.cpp @@ -181,6 +181,7 @@ void FTGATexture::Unload () delete[] Pixels; Pixels = NULL; } + FTexture::Unload(); } //========================================================================== diff --git a/src/textures/warptexture.cpp b/src/textures/warptexture.cpp index a8a2ddb9e..b6977dd77 100644 --- a/src/textures/warptexture.cpp +++ b/src/textures/warptexture.cpp @@ -74,6 +74,7 @@ void FWarpTexture::Unload () Spans = NULL; } SourcePic->Unload (); + FTexture::Unload(); } bool FWarpTexture::CheckModified () diff --git a/src/v_draw.cpp b/src/v_draw.cpp index d03853c11..02ba591b6 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -1403,7 +1403,7 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, R_SetSpanColormap(colormap, clamp(shade >> FRACBITS, 0, NUMCOLORMAPS - 1)); else R_SetSpanColormap(&identitycolormap, 0); - R_SetSpanSource(tex->GetPixels()); + R_SetSpanSource(r_swtruecolor ? (const BYTE*)tex->GetPixelsBgra() : tex->GetPixels()); scalex = double(1u << (32 - ds_xbits)) / scalex; scaley = double(1u << (32 - ds_ybits)) / scaley; ds_xstep = xs_RoundToInt(cosrot * scalex); diff --git a/src/v_font.cpp b/src/v_font.cpp index 052074d11..ef9b69dd1 100644 --- a/src/v_font.cpp +++ b/src/v_font.cpp @@ -1662,6 +1662,7 @@ void FFontChar1::Unload () delete[] Pixels; Pixels = NULL; } + FTexture::Unload(); } //========================================================================== @@ -1723,6 +1724,7 @@ void FFontChar2::Unload () delete[] Pixels; Pixels = NULL; } + FTexture::Unload(); } //========================================================================== diff --git a/src/v_video.cpp b/src/v_video.cpp index e58638121..2cf04a29d 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -117,7 +117,6 @@ public: const BYTE *GetColumn(unsigned int column, const Span **spans_out); const BYTE *GetPixels(); - void Unload(); bool CheckModified(); void SetTranslation(int num); @@ -1076,16 +1075,6 @@ void FPaletteTester::SetTranslation(int num) } } -//========================================================================== -// -// FPaletteTester :: Unload -// -//========================================================================== - -void FPaletteTester::Unload() -{ -} - //========================================================================== // // FPaletteTester :: GetColumn From 69b2fa72e86b180351a70a95243c1b7484f8cec9 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 13 Jun 2016 23:10:54 +0200 Subject: [PATCH 42/94] Moved RGBA draw stuff to its own header file --- src/r_draw.cpp | 91 +++++++------- src/r_draw.h | 239 ------------------------------------- src/r_draw_rgba.cpp | 39 +++--- src/r_draw_rgba.h | 276 +++++++++++++++++++++++++++++++++++++++++++ src/r_drawt_rgba.cpp | 95 +++++++-------- src/r_main.cpp | 1 + src/r_swrenderer.cpp | 1 + src/r_things.cpp | 1 + 8 files changed, 393 insertions(+), 350 deletions(-) create mode 100644 src/r_draw_rgba.h diff --git a/src/r_draw.cpp b/src/r_draw.cpp index ec7313c4f..552e5ff13 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -39,6 +39,7 @@ #include "v_palette.h" #include "r_data/colormaps.h" #include "r_plane.h" +#include "r_draw_rgba.h" #include "gi.h" #include "stats.h" @@ -2295,34 +2296,34 @@ void R_InitColumnDrawers () domvline4_saved = domvline4; } - R_DrawColumnHoriz = R_DrawColumnHorizP_RGBA_C; - R_DrawColumn = R_DrawColumnP_RGBA_C; - R_DrawFuzzColumn = R_DrawFuzzColumnP_RGBA_C; - R_DrawTranslatedColumn = R_DrawTranslatedColumnP_RGBA_C; - R_DrawShadedColumn = R_DrawShadedColumnP_RGBA_C; - R_DrawSpanMasked = R_DrawSpanMaskedP_RGBA_C; - R_DrawSpan = R_DrawSpanP_RGBA_C; + R_DrawColumnHoriz = R_DrawColumnHorizP_RGBA; + R_DrawColumn = R_DrawColumnP_RGBA; + R_DrawFuzzColumn = R_DrawFuzzColumnP_RGBA; + R_DrawTranslatedColumn = R_DrawTranslatedColumnP_RGBA; + R_DrawShadedColumn = R_DrawShadedColumnP_RGBA; + R_DrawSpanMasked = R_DrawSpanMaskedP_RGBA; + R_DrawSpan = R_DrawSpanP_RGBA; - R_DrawSpanTranslucent = R_DrawSpanTranslucentP_RGBA_C; - R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_RGBA_C; - R_DrawSpanAddClamp = R_DrawSpanAddClampP_RGBA_C; - R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_RGBA_C; + R_DrawSpanTranslucent = R_DrawSpanTranslucentP_RGBA; + R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_RGBA; + R_DrawSpanAddClamp = R_DrawSpanAddClampP_RGBA; + R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_RGBA; R_FillColumn = R_FillColumnP_RGBA; - R_FillAddColumn = R_FillAddColumn_RGBA_C; + R_FillAddColumn = R_FillAddColumn_RGBA; R_FillAddClampColumn = R_FillAddClampColumn_RGBA; R_FillSubClampColumn = R_FillSubClampColumn_RGBA; R_FillRevSubClampColumn = R_FillRevSubClampColumn_RGBA; - R_DrawAddColumn = R_DrawAddColumnP_RGBA_C; - R_DrawTlatedAddColumn = R_DrawTlatedAddColumnP_RGBA_C; - R_DrawAddClampColumn = R_DrawAddClampColumnP_RGBA_C; - R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumnP_RGBA_C; - R_DrawSubClampColumn = R_DrawSubClampColumnP_RGBA_C; - R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumnP_RGBA_C; - R_DrawRevSubClampColumn = R_DrawRevSubClampColumnP_RGBA_C; - R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumnP_RGBA_C; + R_DrawAddColumn = R_DrawAddColumnP_RGBA; + R_DrawTlatedAddColumn = R_DrawTlatedAddColumnP_RGBA; + R_DrawAddClampColumn = R_DrawAddClampColumnP_RGBA; + R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumnP_RGBA; + R_DrawSubClampColumn = R_DrawSubClampColumnP_RGBA; + R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumnP_RGBA; + R_DrawRevSubClampColumn = R_DrawRevSubClampColumnP_RGBA; + R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumnP_RGBA; R_FillSpan = R_FillSpan_RGBA; R_DrawFogBoundary = R_DrawFogBoundary_RGBA; - R_FillColumnHoriz = R_FillColumnHorizP_RGBA_C; + R_FillColumnHoriz = R_FillColumnHorizP_RGBA; R_DrawFogBoundary = R_DrawFogBoundary_RGBA; R_MapTiltedPlane = R_MapTiltedPlane_RGBA; @@ -2338,30 +2339,30 @@ void R_InitColumnDrawers () tmvline1_revsubclamp = tmvline1_revsubclamp_RGBA; tmvline4_revsubclamp = tmvline4_revsubclamp_RGBA; - rt_copy1col = rt_copy1col_RGBA_c; - rt_copy4cols = rt_copy4cols_RGBA_c; - rt_map1col = rt_map1col_RGBA_c; - rt_map4cols = rt_map4cols_RGBA_c; - rt_shaded1col = rt_shaded1col_RGBA_c; - rt_shaded4cols = rt_shaded4cols_RGBA_c; - rt_add1col = rt_add1col_RGBA_c; - rt_add4cols = rt_add4cols_RGBA_c; - rt_addclamp1col = rt_addclamp1col_RGBA_c; - rt_addclamp4cols = rt_addclamp4cols_RGBA_c; - rt_subclamp1col = rt_subclamp1col_RGBA_c; - rt_revsubclamp1col = rt_revsubclamp1col_RGBA_c; - rt_tlate1col = rt_tlate1col_RGBA_c; - rt_tlateadd1col = rt_tlateadd1col_RGBA_c; - rt_tlateaddclamp1col = rt_tlateaddclamp1col_RGBA_c; - rt_tlatesubclamp1col = rt_tlatesubclamp1col_RGBA_c; - rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_RGBA_c; - rt_subclamp4cols = rt_subclamp4cols_RGBA_c; - rt_revsubclamp4cols = rt_revsubclamp4cols_RGBA_c; - rt_tlate4cols = rt_tlate4cols_RGBA_c; - rt_tlateadd4cols = rt_tlateadd4cols_RGBA_c; - rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_RGBA_c; - rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_RGBA_c; - rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_RGBA_c; + rt_copy1col = rt_copy1col_RGBA; + rt_copy4cols = rt_copy4cols_RGBA; + rt_map1col = rt_map1col_RGBA; + rt_map4cols = rt_map4cols_RGBA; + rt_shaded1col = rt_shaded1col_RGBA; + rt_shaded4cols = rt_shaded4cols_RGBA; + rt_add1col = rt_add1col_RGBA; + rt_add4cols = rt_add4cols_RGBA; + rt_addclamp1col = rt_addclamp1col_RGBA; + rt_addclamp4cols = rt_addclamp4cols_RGBA; + rt_subclamp1col = rt_subclamp1col_RGBA; + rt_revsubclamp1col = rt_revsubclamp1col_RGBA; + rt_tlate1col = rt_tlate1col_RGBA; + rt_tlateadd1col = rt_tlateadd1col_RGBA; + rt_tlateaddclamp1col = rt_tlateaddclamp1col_RGBA; + rt_tlatesubclamp1col = rt_tlatesubclamp1col_RGBA; + rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_RGBA; + rt_subclamp4cols = rt_subclamp4cols_RGBA; + rt_revsubclamp4cols = rt_revsubclamp4cols_RGBA; + rt_tlate4cols = rt_tlate4cols_RGBA; + rt_tlateadd4cols = rt_tlateadd4cols_RGBA; + rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_RGBA; + rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_RGBA; + rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_RGBA; rt_initcols = rt_initcols_rgba; rt_span_coverage = rt_span_coverage_rgba; diff --git a/src/r_draw.h b/src/r_draw.h index d09d0ab89..cea05e469 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -24,11 +24,6 @@ #define __R_DRAW__ #include "r_defs.h" -#include -#include -#include -#include -#include // Spectre/Invisibility. #define FUZZTABLE 50 @@ -175,39 +170,6 @@ void rt_map4cols_asm1 (int sx, int yl, int yh); void rt_map4cols_asm2 (int sx, int yl, int yh); void rt_add4cols_asm (int sx, int yl, int yh); void rt_addclamp4cols_asm (int sx, int yl, int yh); - -/// - -void rt_copy1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_copy4cols_RGBA_c (int sx, int yl, int yh); - -void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_shaded4cols_RGBA_c (int sx, int yl, int yh); - -void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); - -void rt_tlate1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_tlateadd1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_tlateaddclamp1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_tlatesubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_tlaterevsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); - -void rt_map4cols_RGBA_c (int sx, int yl, int yh); -void rt_add4cols_RGBA_c (int sx, int yl, int yh); -void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh); -void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh); -void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh); - -void rt_tlate4cols_RGBA_c (int sx, int yl, int yh); -void rt_tlateadd4cols_RGBA_c (int sx, int yl, int yh); -void rt_tlateaddclamp4cols_RGBA_c (int sx, int yl, int yh); -void rt_tlatesubclamp4cols_RGBA_c (int sx, int yl, int yh); -void rt_tlaterevsubclamp4cols_RGBA_c (int sx, int yl, int yh); - } extern void (*rt_copy1col)(int hx, int sx, int yl, int yh); @@ -247,10 +209,8 @@ void rt_draw4cols (int sx); // [RH] Preps the temporary horizontal buffer. void rt_initcols_pal (BYTE *buffer); -void rt_initcols_rgba (BYTE *buffer); void rt_span_coverage_pal(int x, int start, int stop); -void rt_span_coverage_rgba(int x, int start, int stop); extern void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip); @@ -280,46 +240,6 @@ void R_DrawSpanMaskedP_C (void); #endif -void R_DrawColumnHorizP_RGBA_C (void); -void R_DrawColumnP_RGBA_C (void); -void R_DrawFuzzColumnP_RGBA_C (void); -void R_DrawTranslatedColumnP_RGBA_C (void); -void R_DrawShadedColumnP_RGBA_C (void); -void R_DrawSpanP_RGBA_C (void); -void R_DrawSpanMaskedP_RGBA_C (void); - -void R_DrawSpanTranslucentP_RGBA_C(); -void R_DrawSpanMaskedTranslucentP_RGBA_C(); -void R_DrawSpanAddClampP_RGBA_C(); -void R_DrawSpanMaskedAddClampP_RGBA_C(); -void R_FillColumnP_RGBA(); -void R_FillAddColumn_RGBA_C(); -void R_FillAddClampColumn_RGBA(); -void R_FillSubClampColumn_RGBA(); -void R_FillRevSubClampColumn_RGBA(); -void R_DrawAddColumnP_RGBA_C(); -void R_DrawTlatedAddColumnP_RGBA_C(); -void R_DrawAddClampColumnP_RGBA_C(); -void R_DrawAddClampTranslatedColumnP_RGBA_C(); -void R_DrawSubClampColumnP_RGBA_C(); -void R_DrawSubClampTranslatedColumnP_RGBA_C(); -void R_DrawRevSubClampColumnP_RGBA_C(); -void R_DrawRevSubClampTranslatedColumnP_RGBA_C(); -void R_FillSpan_RGBA(); -void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip); -fixed_t tmvline1_add_RGBA(); -void tmvline4_add_RGBA(); -fixed_t tmvline1_addclamp_RGBA(); -void tmvline4_addclamp_RGBA(); -fixed_t tmvline1_subclamp_RGBA(); -void tmvline4_subclamp_RGBA(); -fixed_t tmvline1_revsubclamp_RGBA(); -void tmvline4_revsubclamp_RGBA(); -DWORD vlinec1_RGBA(); -void vlinec4_RGBA(); -DWORD mvlinec1_RGBA(); -void mvlinec4_RGBA(); - void R_DrawSpanTranslucentP_C (void); void R_DrawSpanMaskedTranslucentP_C (void); @@ -348,9 +268,6 @@ void R_FillColumnP_C (void); void R_FillColumnHorizP_C (void); void R_FillSpan_C (void); -void R_FillColumnHorizP_RGBA_C(void); -void R_FillSpan_RGBA_C(void); - #ifdef X86_ASM #define R_SetupDrawSlab R_SetupDrawSlabA #define R_DrawSlab R_DrawSlabA @@ -443,160 +360,4 @@ void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade); void R_SetTranslationMap(lighttable_t *translation); -// Redirect drawer commands to worker threads -void R_BeginDrawerCommands(); - -// Wait until all drawers finished executing -void R_EndDrawerCommands(); - -class DrawerCommandQueue; - -class DrawerThread -{ -public: - std::thread thread; - - // Thread line index of this thread - int core = 0; - - // Number of active threads - int num_cores = 1; - - // Range of rows processed this pass - int pass_start_y = 0; - int pass_end_y = MAXHEIGHT; - - uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; - uint32_t *dc_temp_rgba; - - // Checks if a line is rendered by this thread - bool line_skipped_by_thread(int line) - { - return line < pass_start_y || line >= pass_end_y || line % num_cores != core; - } - - // The number of lines to skip to reach the first line to be rendered by this thread - int skipped_by_thread(int first_line) - { - int pass_skip = MAX(pass_start_y - first_line, 0); - int core_skip = (num_cores - (first_line + pass_skip - core) % num_cores) % num_cores; - return pass_skip + core_skip; - } - - // The number of lines to be rendered by this thread - int count_for_thread(int first_line, int count) - { - int lines_until_pass_end = MAX(pass_end_y - first_line, 0); - count = MIN(count, lines_until_pass_end); - int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores; - return MAX(c, 0); - } - - // Calculate the dest address for the first line to be rendered by this thread - uint32_t *dest_for_thread(int first_line, int pitch, uint32_t *dest) - { - return dest + skipped_by_thread(first_line) * pitch; - } -}; - -class DrawerCommand -{ -protected: - int dc_dest_y; - -public: - DrawerCommand() - { - dc_dest_y = static_cast((dc_dest - dc_destorg) / (dc_pitch * 4)); - } - - virtual void Execute(DrawerThread *thread) = 0; -}; - -class DrawerCommandQueue -{ - enum { memorypool_size = 4 * 1024 * 1024 }; - char memorypool[memorypool_size]; - size_t memorypool_pos = 0; - - std::vector commands; - - std::vector threads; - - std::mutex start_mutex; - std::condition_variable start_condition; - std::vector active_commands; - bool shutdown_flag = false; - int run_id = 0; - - std::mutex end_mutex; - std::condition_variable end_condition; - size_t finished_threads = 0; - - int threaded_render = 0; - DrawerThread single_core_thread; - int num_passes = 2; - int rows_in_pass = 540; - - void StartThreads(); - void StopThreads(); - void Finish(); - - static DrawerCommandQueue *Instance(); - - ~DrawerCommandQueue(); - -public: - // Allocate memory valid for the duration of a command execution - static void* AllocMemory(size_t size); - - // Queue command to be executed by drawer worker threads - template - static void QueueCommand(Types &&... args) - { - auto queue = Instance(); - if (queue->threaded_render == 0) - { - T command(std::forward(args)...); - command.Execute(&queue->single_core_thread); - } - else - { - void *ptr = AllocMemory(sizeof(T)); - if (!ptr) - return; - T *command = new (ptr)T(std::forward(args)...); - queue->commands.push_back(command); - } - } - - // Redirects all drawing commands to worker threads until End is called - // Begin/End blocks can be nested. - static void Begin(); - - // End redirection and wait until all worker threads finished executing - static void End(); - - // Waits until all worker threads finished executing - static void WaitForWorkers(); -}; - -class ApplySpecialColormapRGBACommand : public DrawerCommand -{ - BYTE *buffer; - int pitch; - int width; - int height; - int start_red; - int start_green; - int start_blue; - int end_red; - int end_green; - int end_blue; - -public: - ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen); - void Execute(DrawerThread *thread) override; -}; - #endif diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 7e9f85117..a9dd2db32 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -37,6 +37,7 @@ #include "v_palette.h" #include "r_data/colormaps.h" #include "r_plane.h" +#include "r_draw_rgba.h" #include "gi.h" #include "stats.h" @@ -3655,7 +3656,7 @@ void R_EndDrawerCommands() DrawerCommandQueue::End(); } -void R_DrawColumnP_RGBA_C() +void R_DrawColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } @@ -3665,7 +3666,7 @@ void R_FillColumnP_RGBA() DrawerCommandQueue::QueueCommand(); } -void R_FillAddColumn_RGBA_C() +void R_FillAddColumn_RGBA() { DrawerCommandQueue::QueueCommand(); } @@ -3685,88 +3686,88 @@ void R_FillRevSubClampColumn_RGBA() DrawerCommandQueue::QueueCommand(); } -void R_DrawFuzzColumnP_RGBA_C() +void R_DrawFuzzColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); fuzzpos = (fuzzpos + dc_yh - dc_yl) % FUZZTABLE; } -void R_DrawAddColumnP_RGBA_C() +void R_DrawAddColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawTranslatedColumnP_RGBA_C() +void R_DrawTranslatedColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawTlatedAddColumnP_RGBA_C() +void R_DrawTlatedAddColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawShadedColumnP_RGBA_C() +void R_DrawShadedColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawAddClampColumnP_RGBA_C() +void R_DrawAddClampColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawAddClampTranslatedColumnP_RGBA_C() +void R_DrawAddClampTranslatedColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSubClampColumnP_RGBA_C() +void R_DrawSubClampColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSubClampTranslatedColumnP_RGBA_C() +void R_DrawSubClampTranslatedColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawRevSubClampColumnP_RGBA_C() +void R_DrawRevSubClampColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawRevSubClampTranslatedColumnP_RGBA_C() +void R_DrawRevSubClampTranslatedColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanP_RGBA_C() +void R_DrawSpanP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanMaskedP_RGBA_C() +void R_DrawSpanMaskedP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanTranslucentP_RGBA_C() +void R_DrawSpanTranslucentP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanMaskedTranslucentP_RGBA_C() +void R_DrawSpanMaskedTranslucentP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanAddClampP_RGBA_C() +void R_DrawSpanAddClampP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanMaskedAddClampP_RGBA_C() +void R_DrawSpanMaskedAddClampP_RGBA() { DrawerCommandQueue::QueueCommand(); } diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h new file mode 100644 index 000000000..5d7402634 --- /dev/null +++ b/src/r_draw_rgba.h @@ -0,0 +1,276 @@ +// Emacs style mode select -*- C++ -*- +//----------------------------------------------------------------------------- +// +// $Id:$ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// +// DESCRIPTION: +// System specific interface stuff. +// +//----------------------------------------------------------------------------- + + +#ifndef __R_DRAW_RGBA__ +#define __R_DRAW_RGBA__ + +#include "r_draw.h" +#include +#include +#include +#include +#include + +///////////////////////////////////////////////////////////////////////////// +// Drawer functions: + +void rt_initcols_rgba(BYTE *buffer); +void rt_span_coverage_rgba(int x, int start, int stop); + +void rt_copy1col_RGBA(int hx, int sx, int yl, int yh); +void rt_copy4cols_RGBA(int sx, int yl, int yh); +void rt_shaded1col_RGBA(int hx, int sx, int yl, int yh); +void rt_shaded4cols_RGBA(int sx, int yl, int yh); +void rt_map1col_RGBA(int hx, int sx, int yl, int yh); +void rt_add1col_RGBA(int hx, int sx, int yl, int yh); +void rt_addclamp1col_RGBA(int hx, int sx, int yl, int yh); +void rt_subclamp1col_RGBA(int hx, int sx, int yl, int yh); +void rt_revsubclamp1col_RGBA(int hx, int sx, int yl, int yh); +void rt_tlate1col_RGBA(int hx, int sx, int yl, int yh); +void rt_tlateadd1col_RGBA(int hx, int sx, int yl, int yh); +void rt_tlateaddclamp1col_RGBA(int hx, int sx, int yl, int yh); +void rt_tlatesubclamp1col_RGBA(int hx, int sx, int yl, int yh); +void rt_tlaterevsubclamp1col_RGBA(int hx, int sx, int yl, int yh); +void rt_map4cols_RGBA(int sx, int yl, int yh); +void rt_add4cols_RGBA(int sx, int yl, int yh); +void rt_addclamp4cols_RGBA(int sx, int yl, int yh); +void rt_subclamp4cols_RGBA(int sx, int yl, int yh); +void rt_revsubclamp4cols_RGBA(int sx, int yl, int yh); +void rt_tlate4cols_RGBA(int sx, int yl, int yh); +void rt_tlateadd4cols_RGBA(int sx, int yl, int yh); +void rt_tlateaddclamp4cols_RGBA(int sx, int yl, int yh); +void rt_tlatesubclamp4cols_RGBA(int sx, int yl, int yh); +void rt_tlaterevsubclamp4cols_RGBA(int sx, int yl, int yh); + +void R_DrawColumnHorizP_RGBA(); +void R_DrawColumnP_RGBA(); +void R_DrawFuzzColumnP_RGBA(); +void R_DrawTranslatedColumnP_RGBA(); +void R_DrawShadedColumnP_RGBA(); + +void R_FillColumnP_RGBA(); +void R_FillAddColumn_RGBA(); +void R_FillAddClampColumn_RGBA(); +void R_FillSubClampColumn_RGBA(); +void R_FillRevSubClampColumn_RGBA(); +void R_DrawAddColumnP_RGBA(); +void R_DrawTlatedAddColumnP_RGBA(); +void R_DrawAddClampColumnP_RGBA(); +void R_DrawAddClampTranslatedColumnP_RGBA(); +void R_DrawSubClampColumnP_RGBA(); +void R_DrawSubClampTranslatedColumnP_RGBA(); +void R_DrawRevSubClampColumnP_RGBA(); +void R_DrawRevSubClampTranslatedColumnP_RGBA(); + +void R_DrawSpanP_RGBA(void); +void R_DrawSpanMaskedP_RGBA(void); +void R_DrawSpanTranslucentP_RGBA(); +void R_DrawSpanMaskedTranslucentP_RGBA(); +void R_DrawSpanAddClampP_RGBA(); +void R_DrawSpanMaskedAddClampP_RGBA(); +void R_FillSpan_RGBA(); + +void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip); + +DWORD vlinec1_RGBA(); +void vlinec4_RGBA(); +DWORD mvlinec1_RGBA(); +void mvlinec4_RGBA(); +fixed_t tmvline1_add_RGBA(); +void tmvline4_add_RGBA(); +fixed_t tmvline1_addclamp_RGBA(); +void tmvline4_addclamp_RGBA(); +fixed_t tmvline1_subclamp_RGBA(); +void tmvline4_subclamp_RGBA(); +fixed_t tmvline1_revsubclamp_RGBA(); +void tmvline4_revsubclamp_RGBA(); + +void R_FillColumnHorizP_RGBA(); +void R_FillSpan_RGBA(); + +///////////////////////////////////////////////////////////////////////////// +// Multithreaded rendering infrastructure: + +// Redirect drawer commands to worker threads +void R_BeginDrawerCommands(); + +// Wait until all drawers finished executing +void R_EndDrawerCommands(); + +struct FSpecialColormap; +class DrawerCommandQueue; + +// Worker data for each thread executing drawer commands +class DrawerThread +{ +public: + std::thread thread; + + // Thread line index of this thread + int core = 0; + + // Number of active threads + int num_cores = 1; + + // Range of rows processed this pass + int pass_start_y = 0; + int pass_end_y = MAXHEIGHT; + + uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; + uint32_t *dc_temp_rgba; + + // Checks if a line is rendered by this thread + bool line_skipped_by_thread(int line) + { + return line < pass_start_y || line >= pass_end_y || line % num_cores != core; + } + + // The number of lines to skip to reach the first line to be rendered by this thread + int skipped_by_thread(int first_line) + { + int pass_skip = MAX(pass_start_y - first_line, 0); + int core_skip = (num_cores - (first_line + pass_skip - core) % num_cores) % num_cores; + return pass_skip + core_skip; + } + + // The number of lines to be rendered by this thread + int count_for_thread(int first_line, int count) + { + int lines_until_pass_end = MAX(pass_end_y - first_line, 0); + count = MIN(count, lines_until_pass_end); + int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores; + return MAX(c, 0); + } + + // Calculate the dest address for the first line to be rendered by this thread + uint32_t *dest_for_thread(int first_line, int pitch, uint32_t *dest) + { + return dest + skipped_by_thread(first_line) * pitch; + } +}; + +// Task to be executed by each worker thread +class DrawerCommand +{ +protected: + int dc_dest_y; + +public: + DrawerCommand() + { + dc_dest_y = static_cast((dc_dest - dc_destorg) / (dc_pitch * 4)); + } + + virtual void Execute(DrawerThread *thread) = 0; +}; + +// Manages queueing up commands and executing them on worker threads +class DrawerCommandQueue +{ + enum { memorypool_size = 4 * 1024 * 1024 }; + char memorypool[memorypool_size]; + size_t memorypool_pos = 0; + + std::vector commands; + + std::vector threads; + + std::mutex start_mutex; + std::condition_variable start_condition; + std::vector active_commands; + bool shutdown_flag = false; + int run_id = 0; + + std::mutex end_mutex; + std::condition_variable end_condition; + size_t finished_threads = 0; + + int threaded_render = 0; + DrawerThread single_core_thread; + int num_passes = 2; + int rows_in_pass = 540; + + void StartThreads(); + void StopThreads(); + void Finish(); + + static DrawerCommandQueue *Instance(); + + ~DrawerCommandQueue(); + +public: + // Allocate memory valid for the duration of a command execution + static void* AllocMemory(size_t size); + + // Queue command to be executed by drawer worker threads + template + static void QueueCommand(Types &&... args) + { + auto queue = Instance(); + if (queue->threaded_render == 0) + { + T command(std::forward(args)...); + command.Execute(&queue->single_core_thread); + } + else + { + void *ptr = AllocMemory(sizeof(T)); + if (!ptr) + return; + T *command = new (ptr)T(std::forward(args)...); + queue->commands.push_back(command); + } + } + + // Redirects all drawing commands to worker threads until End is called + // Begin/End blocks can be nested. + static void Begin(); + + // End redirection and wait until all worker threads finished executing + static void End(); + + // Waits until all worker threads finished executing + static void WaitForWorkers(); +}; + +///////////////////////////////////////////////////////////////////////////// +// Drawer commands: + +class ApplySpecialColormapRGBACommand : public DrawerCommand +{ + BYTE *buffer; + int pitch; + int width; + int height; + int start_red; + int start_green; + int start_blue; + int end_red; + int end_green; + int end_blue; + +public: + ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen); + void Execute(DrawerThread *thread) override; +}; + +#endif diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index cd124ac63..32d5080c5 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -42,6 +42,7 @@ #include "r_main.h" #include "r_things.h" #include "v_video.h" +#include "r_draw_rgba.h" #ifndef NO_SSE #include #endif @@ -1628,171 +1629,171 @@ public: ///////////////////////////////////////////////////////////////////////////// // Copies one span at hx to the screen at sx. -void rt_copy1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_copy1col_RGBA (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Copies all four spans to the screen starting at sx. -void rt_copy4cols_RGBA_c (int sx, int yl, int yh) +void rt_copy4cols_RGBA (int sx, int yl, int yh) { // To do: we could do this with SSE using __m128i - rt_copy1col_RGBA_c(0, sx, yl, yh); - rt_copy1col_RGBA_c(1, sx + 1, yl, yh); - rt_copy1col_RGBA_c(2, sx + 2, yl, yh); - rt_copy1col_RGBA_c(3, sx + 3, yl, yh); + rt_copy1col_RGBA(0, sx, yl, yh); + rt_copy1col_RGBA(1, sx + 1, yl, yh); + rt_copy1col_RGBA(2, sx + 2, yl, yh); + rt_copy1col_RGBA(3, sx + 3, yl, yh); } // Maps one span at hx to the screen at sx. -void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_map1col_RGBA (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Maps all four spans to the screen starting at sx. -void rt_map4cols_RGBA_c (int sx, int yl, int yh) +void rt_map4cols_RGBA (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } -void rt_Translate1col_RGBA_c(const BYTE *translation, int hx, int yl, int yh) +void rt_Translate1col_RGBA(const BYTE *translation, int hx, int yl, int yh) { DrawerCommandQueue::QueueCommand(translation, hx, yl, yh); } -void rt_Translate4cols_RGBA_c(const BYTE *translation, int yl, int yh) +void rt_Translate4cols_RGBA(const BYTE *translation, int yl, int yh) { DrawerCommandQueue::QueueCommand(translation, yl, yh); } // Translates one span at hx to the screen at sx. -void rt_tlate1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_tlate1col_RGBA (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); + rt_Translate1col_RGBA(dc_translation, hx, yl, yh); rt_map1col(hx, sx, yl, yh); } // Translates all four spans to the screen starting at sx. -void rt_tlate4cols_RGBA_c (int sx, int yl, int yh) +void rt_tlate4cols_RGBA (int sx, int yl, int yh) { - rt_Translate4cols_RGBA_c(dc_translation, yl, yh); + rt_Translate4cols_RGBA(dc_translation, yl, yh); rt_map4cols(sx, yl, yh); } // Adds one span at hx to the screen at sx without clamping. -void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_add1col_RGBA (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Adds all four spans to the screen starting at sx without clamping. -void rt_add4cols_RGBA_c (int sx, int yl, int yh) +void rt_add4cols_RGBA (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and adds one span at hx to the screen at sx without clamping. -void rt_tlateadd1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_tlateadd1col_RGBA (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); + rt_Translate1col_RGBA(dc_translation, hx, yl, yh); rt_add1col(hx, sx, yl, yh); } // Translates and adds all four spans to the screen starting at sx without clamping. -void rt_tlateadd4cols_RGBA_c(int sx, int yl, int yh) +void rt_tlateadd4cols_RGBA(int sx, int yl, int yh) { - rt_Translate4cols_RGBA_c(dc_translation, yl, yh); + rt_Translate4cols_RGBA(dc_translation, yl, yh); rt_add4cols(sx, yl, yh); } // Shades one span at hx to the screen at sx. -void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_shaded1col_RGBA (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Shades all four spans to the screen starting at sx. -void rt_shaded4cols_RGBA_c (int sx, int yl, int yh) +void rt_shaded4cols_RGBA (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Adds one span at hx to the screen at sx with clamping. -void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_addclamp1col_RGBA (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Adds all four spans to the screen starting at sx with clamping. -void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) +void rt_addclamp4cols_RGBA (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and adds one span at hx to the screen at sx with clamping. -void rt_tlateaddclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_tlateaddclamp1col_RGBA (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); - rt_addclamp1col_RGBA_c(hx, sx, yl, yh); + rt_Translate1col_RGBA(dc_translation, hx, yl, yh); + rt_addclamp1col_RGBA(hx, sx, yl, yh); } // Translates and adds all four spans to the screen starting at sx with clamping. -void rt_tlateaddclamp4cols_RGBA_c (int sx, int yl, int yh) +void rt_tlateaddclamp4cols_RGBA (int sx, int yl, int yh) { - rt_Translate4cols_RGBA_c(dc_translation, yl, yh); + rt_Translate4cols_RGBA(dc_translation, yl, yh); rt_addclamp4cols(sx, yl, yh); } // Subtracts one span at hx to the screen at sx with clamping. -void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_subclamp1col_RGBA (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Subtracts all four spans to the screen starting at sx with clamping. -void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) +void rt_subclamp4cols_RGBA (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and subtracts one span at hx to the screen at sx with clamping. -void rt_tlatesubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_tlatesubclamp1col_RGBA (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); - rt_subclamp1col_RGBA_c(hx, sx, yl, yh); + rt_Translate1col_RGBA(dc_translation, hx, yl, yh); + rt_subclamp1col_RGBA(hx, sx, yl, yh); } // Translates and subtracts all four spans to the screen starting at sx with clamping. -void rt_tlatesubclamp4cols_RGBA_c (int sx, int yl, int yh) +void rt_tlatesubclamp4cols_RGBA (int sx, int yl, int yh) { - rt_Translate4cols_RGBA_c(dc_translation, yl, yh); - rt_subclamp4cols_RGBA_c(sx, yl, yh); + rt_Translate4cols_RGBA(dc_translation, yl, yh); + rt_subclamp4cols_RGBA(sx, yl, yh); } // Subtracts one span at hx from the screen at sx with clamping. -void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_revsubclamp1col_RGBA (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Subtracts all four spans from the screen starting at sx with clamping. -void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) +void rt_revsubclamp4cols_RGBA (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and subtracts one span at hx from the screen at sx with clamping. -void rt_tlaterevsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_tlaterevsubclamp1col_RGBA (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); - rt_revsubclamp1col_RGBA_c(hx, sx, yl, yh); + rt_Translate1col_RGBA(dc_translation, hx, yl, yh); + rt_revsubclamp1col_RGBA(hx, sx, yl, yh); } // Translates and subtracts all four spans from the screen starting at sx with clamping. -void rt_tlaterevsubclamp4cols_RGBA_c (int sx, int yl, int yh) +void rt_tlaterevsubclamp4cols_RGBA (int sx, int yl, int yh) { - rt_Translate4cols_RGBA_c(dc_translation, yl, yh); - rt_revsubclamp4cols_RGBA_c(sx, yl, yh); + rt_Translate4cols_RGBA(dc_translation, yl, yh); + rt_revsubclamp4cols_RGBA(sx, yl, yh); } // Before each pass through a rendering loop that uses these routines, @@ -1815,7 +1816,7 @@ void rt_span_coverage_rgba(int x, int start, int stop) // Stretches a column into a temporary buffer which is later // drawn to the screen along with up to three other columns. -void R_DrawColumnHorizP_RGBA_C (void) +void R_DrawColumnHorizP_RGBA (void) { if (dc_count <= 0) return; @@ -1830,7 +1831,7 @@ void R_DrawColumnHorizP_RGBA_C (void) } // [RH] Just fills a column with a given color -void R_FillColumnHorizP_RGBA_C (void) +void R_FillColumnHorizP_RGBA (void) { if (dc_count <= 0) return; diff --git a/src/r_main.cpp b/src/r_main.cpp index 4e5ff1dbd..247a98125 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -40,6 +40,7 @@ #include "r_segs.h" #include "r_3dfloors.h" #include "r_sky.h" +#include "r_draw_rgba.h" #include "st_stuff.h" #include "c_cvars.h" #include "c_dispatch.h" diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 5be41660e..fbbd65b17 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -42,6 +42,7 @@ #include "r_3dfloors.h" #include "textures/textures.h" #include "r_data/voxels.h" +#include "r_draw_rgba.h" EXTERN_CVAR(Bool, r_shadercolormaps) diff --git a/src/r_things.cpp b/src/r_things.cpp index 2dc0bdb6c..f1f29f160 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -58,6 +58,7 @@ #include "r_plane.h" #include "r_segs.h" #include "r_3dfloors.h" +#include "r_draw_rgba.h" #include "v_palette.h" #include "r_data/r_translate.h" #include "r_data/colormaps.h" From 586d5cdf1eb5609fcd480aa0d69fc764c4fc0103 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 13 Jun 2016 23:33:52 +0200 Subject: [PATCH 43/94] Normalize naming convention a little --- src/r_draw.cpp | 136 +++++++++++++++++++++---------------------- src/r_draw_rgba.cpp | 80 ++++++++++++------------- src/r_draw_rgba.h | 128 ++++++++++++++++++++-------------------- src/r_drawt_rgba.cpp | 94 +++++++++++++++--------------- src/r_plane.cpp | 4 +- src/r_plane.h | 4 +- src/r_things.cpp | 2 +- src/r_things.h | 2 +- 8 files changed, 225 insertions(+), 225 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 552e5ff13..ecb4441f8 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -2296,82 +2296,82 @@ void R_InitColumnDrawers () domvline4_saved = domvline4; } - R_DrawColumnHoriz = R_DrawColumnHorizP_RGBA; - R_DrawColumn = R_DrawColumnP_RGBA; - R_DrawFuzzColumn = R_DrawFuzzColumnP_RGBA; - R_DrawTranslatedColumn = R_DrawTranslatedColumnP_RGBA; - R_DrawShadedColumn = R_DrawShadedColumnP_RGBA; - R_DrawSpanMasked = R_DrawSpanMaskedP_RGBA; - R_DrawSpan = R_DrawSpanP_RGBA; + R_DrawColumnHoriz = R_DrawColumnHoriz_rgba; + R_DrawColumn = R_DrawColumn_rgba; + R_DrawFuzzColumn = R_DrawFuzzColumn_rgba; + R_DrawTranslatedColumn = R_DrawTranslatedColumn_rgba; + R_DrawShadedColumn = R_DrawShadedColumn_rgba; + R_DrawSpanMasked = R_DrawSpanMasked_rgba; + R_DrawSpan = R_DrawSpan_rgba; - R_DrawSpanTranslucent = R_DrawSpanTranslucentP_RGBA; - R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_RGBA; - R_DrawSpanAddClamp = R_DrawSpanAddClampP_RGBA; - R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_RGBA; - R_FillColumn = R_FillColumnP_RGBA; - R_FillAddColumn = R_FillAddColumn_RGBA; - R_FillAddClampColumn = R_FillAddClampColumn_RGBA; - R_FillSubClampColumn = R_FillSubClampColumn_RGBA; - R_FillRevSubClampColumn = R_FillRevSubClampColumn_RGBA; - R_DrawAddColumn = R_DrawAddColumnP_RGBA; - R_DrawTlatedAddColumn = R_DrawTlatedAddColumnP_RGBA; - R_DrawAddClampColumn = R_DrawAddClampColumnP_RGBA; - R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumnP_RGBA; - R_DrawSubClampColumn = R_DrawSubClampColumnP_RGBA; - R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumnP_RGBA; - R_DrawRevSubClampColumn = R_DrawRevSubClampColumnP_RGBA; - R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumnP_RGBA; - R_FillSpan = R_FillSpan_RGBA; - R_DrawFogBoundary = R_DrawFogBoundary_RGBA; - R_FillColumnHoriz = R_FillColumnHorizP_RGBA; + R_DrawSpanTranslucent = R_DrawSpanTranslucent_rgba; + R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucent_rgba; + R_DrawSpanAddClamp = R_DrawSpanAddClamp_rgba; + R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClamp_rgba; + R_FillColumn = R_FillColumn_rgba; + R_FillAddColumn = R_FillAddColumn_rgba; + R_FillAddClampColumn = R_FillAddClampColumn_rgba; + R_FillSubClampColumn = R_FillSubClampColumn_rgba; + R_FillRevSubClampColumn = R_FillRevSubClampColumn_rgba; + R_DrawAddColumn = R_DrawAddColumn_rgba; + R_DrawTlatedAddColumn = R_DrawTlatedAddColumn_rgba; + R_DrawAddClampColumn = R_DrawAddClampColumn_rgba; + R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumn_rgba; + R_DrawSubClampColumn = R_DrawSubClampColumn_rgba; + R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumn_rgba; + R_DrawRevSubClampColumn = R_DrawRevSubClampColumn_rgba; + R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumn_rgba; + R_FillSpan = R_FillSpan_rgba; + R_DrawFogBoundary = R_DrawFogBoundary_rgba; + R_FillColumnHoriz = R_FillColumnHoriz_rgba; - R_DrawFogBoundary = R_DrawFogBoundary_RGBA; - R_MapTiltedPlane = R_MapTiltedPlane_RGBA; - R_MapColoredPlane = R_MapColoredPlane_RGBA; - R_DrawParticle = R_DrawParticle_RGBA; + R_DrawFogBoundary = R_DrawFogBoundary_rgba; + R_MapTiltedPlane = R_MapTiltedPlane_rgba; + R_MapColoredPlane = R_MapColoredPlane_rgba; + R_DrawParticle = R_DrawParticle_rgba; - tmvline1_add = tmvline1_add_RGBA; - tmvline4_add = tmvline4_add_RGBA; - tmvline1_addclamp = tmvline1_addclamp_RGBA; - tmvline4_addclamp = tmvline4_addclamp_RGBA; - tmvline1_subclamp = tmvline1_subclamp_RGBA; - tmvline4_subclamp = tmvline4_subclamp_RGBA; - tmvline1_revsubclamp = tmvline1_revsubclamp_RGBA; - tmvline4_revsubclamp = tmvline4_revsubclamp_RGBA; + tmvline1_add = tmvline1_add_rgba; + tmvline4_add = tmvline4_add_rgba; + tmvline1_addclamp = tmvline1_addclamp_rgba; + tmvline4_addclamp = tmvline4_addclamp_rgba; + tmvline1_subclamp = tmvline1_subclamp_rgba; + tmvline4_subclamp = tmvline4_subclamp_rgba; + tmvline1_revsubclamp = tmvline1_revsubclamp_rgba; + tmvline4_revsubclamp = tmvline4_revsubclamp_rgba; - rt_copy1col = rt_copy1col_RGBA; - rt_copy4cols = rt_copy4cols_RGBA; - rt_map1col = rt_map1col_RGBA; - rt_map4cols = rt_map4cols_RGBA; - rt_shaded1col = rt_shaded1col_RGBA; - rt_shaded4cols = rt_shaded4cols_RGBA; - rt_add1col = rt_add1col_RGBA; - rt_add4cols = rt_add4cols_RGBA; - rt_addclamp1col = rt_addclamp1col_RGBA; - rt_addclamp4cols = rt_addclamp4cols_RGBA; - rt_subclamp1col = rt_subclamp1col_RGBA; - rt_revsubclamp1col = rt_revsubclamp1col_RGBA; - rt_tlate1col = rt_tlate1col_RGBA; - rt_tlateadd1col = rt_tlateadd1col_RGBA; - rt_tlateaddclamp1col = rt_tlateaddclamp1col_RGBA; - rt_tlatesubclamp1col = rt_tlatesubclamp1col_RGBA; - rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_RGBA; - rt_subclamp4cols = rt_subclamp4cols_RGBA; - rt_revsubclamp4cols = rt_revsubclamp4cols_RGBA; - rt_tlate4cols = rt_tlate4cols_RGBA; - rt_tlateadd4cols = rt_tlateadd4cols_RGBA; - rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_RGBA; - rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_RGBA; - rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_RGBA; + rt_copy1col = rt_copy1col_rgba; + rt_copy4cols = rt_copy4cols_rgba; + rt_map1col = rt_map1col_rgba; + rt_map4cols = rt_map4cols_rgba; + rt_shaded1col = rt_shaded1col_rgba; + rt_shaded4cols = rt_shaded4cols_rgba; + rt_add1col = rt_add1col_rgba; + rt_add4cols = rt_add4cols_rgba; + rt_addclamp1col = rt_addclamp1col_rgba; + rt_addclamp4cols = rt_addclamp4cols_rgba; + rt_subclamp1col = rt_subclamp1col_rgba; + rt_revsubclamp1col = rt_revsubclamp1col_rgba; + rt_tlate1col = rt_tlate1col_rgba; + rt_tlateadd1col = rt_tlateadd1col_rgba; + rt_tlateaddclamp1col = rt_tlateaddclamp1col_rgba; + rt_tlatesubclamp1col = rt_tlatesubclamp1col_rgba; + rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_rgba; + rt_subclamp4cols = rt_subclamp4cols_rgba; + rt_revsubclamp4cols = rt_revsubclamp4cols_rgba; + rt_tlate4cols = rt_tlate4cols_rgba; + rt_tlateadd4cols = rt_tlateadd4cols_rgba; + rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_rgba; + rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_rgba; + rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_rgba; rt_initcols = rt_initcols_rgba; rt_span_coverage = rt_span_coverage_rgba; - dovline1 = vlinec1_RGBA; - doprevline1 = vlinec1_RGBA; - domvline1 = mvlinec1_RGBA; + dovline1 = vlinec1_rgba; + doprevline1 = vlinec1_rgba; + domvline1 = mvlinec1_rgba; - dovline4 = vlinec4_RGBA; - domvline4 = mvlinec4_RGBA; + dovline4 = vlinec4_rgba; + domvline4 = mvlinec4_rgba; } else { diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index a9dd2db32..2062609b4 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -3656,130 +3656,130 @@ void R_EndDrawerCommands() DrawerCommandQueue::End(); } -void R_DrawColumnP_RGBA() +void R_DrawColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_FillColumnP_RGBA() +void R_FillColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_FillAddColumn_RGBA() +void R_FillAddColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_FillAddClampColumn_RGBA() +void R_FillAddClampColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_FillSubClampColumn_RGBA() +void R_FillSubClampColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_FillRevSubClampColumn_RGBA() +void R_FillRevSubClampColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawFuzzColumnP_RGBA() +void R_DrawFuzzColumn_rgba() { DrawerCommandQueue::QueueCommand(); fuzzpos = (fuzzpos + dc_yh - dc_yl) % FUZZTABLE; } -void R_DrawAddColumnP_RGBA() +void R_DrawAddColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawTranslatedColumnP_RGBA() +void R_DrawTranslatedColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawTlatedAddColumnP_RGBA() +void R_DrawTlatedAddColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawShadedColumnP_RGBA() +void R_DrawShadedColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawAddClampColumnP_RGBA() +void R_DrawAddClampColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawAddClampTranslatedColumnP_RGBA() +void R_DrawAddClampTranslatedColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSubClampColumnP_RGBA() +void R_DrawSubClampColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSubClampTranslatedColumnP_RGBA() +void R_DrawSubClampTranslatedColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawRevSubClampColumnP_RGBA() +void R_DrawRevSubClampColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawRevSubClampTranslatedColumnP_RGBA() +void R_DrawRevSubClampTranslatedColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanP_RGBA() +void R_DrawSpan_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanMaskedP_RGBA() +void R_DrawSpanMasked_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanTranslucentP_RGBA() +void R_DrawSpanTranslucent_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanMaskedTranslucentP_RGBA() +void R_DrawSpanMaskedTranslucent_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanAddClampP_RGBA() +void R_DrawSpanAddClamp_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanMaskedAddClampP_RGBA() +void R_DrawSpanMaskedAddClamp_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_FillSpan_RGBA() +void R_FillSpan_rgba() { DrawerCommandQueue::QueueCommand(); } //extern FTexture *rw_pic; // For the asserts below -DWORD vlinec1_RGBA() +DWORD vlinec1_rgba() { /*DWORD fracstep = dc_iscale; DWORD frac = dc_texturefrac; @@ -3792,79 +3792,79 @@ DWORD vlinec1_RGBA() return dc_texturefrac + dc_count * dc_iscale; } -void vlinec4_RGBA() +void vlinec4_rgba() { DrawerCommandQueue::QueueCommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } -DWORD mvlinec1_RGBA() +DWORD mvlinec1_rgba() { DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } -void mvlinec4_RGBA() +void mvlinec4_rgba() { DrawerCommandQueue::QueueCommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } -fixed_t tmvline1_add_RGBA() +fixed_t tmvline1_add_rgba() { DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } -void tmvline4_add_RGBA() +void tmvline4_add_rgba() { DrawerCommandQueue::QueueCommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } -fixed_t tmvline1_addclamp_RGBA() +fixed_t tmvline1_addclamp_rgba() { DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } -void tmvline4_addclamp_RGBA() +void tmvline4_addclamp_rgba() { DrawerCommandQueue::QueueCommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } -fixed_t tmvline1_subclamp_RGBA() +fixed_t tmvline1_subclamp_rgba() { DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } -void tmvline4_subclamp_RGBA() +void tmvline4_subclamp_rgba() { DrawerCommandQueue::QueueCommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } -fixed_t tmvline1_revsubclamp_RGBA() +fixed_t tmvline1_revsubclamp_rgba() { DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } -void tmvline4_revsubclamp_RGBA() +void tmvline4_revsubclamp_rgba() { DrawerCommandQueue::QueueCommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } -void R_DrawFogBoundarySection_RGBA(int y, int y2, int x1) +void R_DrawFogBoundarySection_rgba(int y, int y2, int x1) { for (; y < y2; ++y) { @@ -3873,7 +3873,7 @@ void R_DrawFogBoundarySection_RGBA(int y, int y2, int x1) } } -void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip) +void R_DrawFogBoundary_rgba(int x1, int x2, short *uclip, short *dclip) { // To do: we do not need to create new spans when using rgba output - instead we should calculate light on a per pixel basis @@ -3913,7 +3913,7 @@ void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip) if (t2 < b2 && rcolormap != 0) { // Colormap 0 is always the identity map, so rendering it is // just a waste of time. - R_DrawFogBoundarySection_RGBA(t2, b2, xr); + R_DrawFogBoundarySection_rgba(t2, b2, xr); } if (t1 < t2) t2 = t1; if (b1 > b2) b2 = b1; @@ -3965,6 +3965,6 @@ void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip) } if (t2 < b2 && rcolormap != 0) { - R_DrawFogBoundarySection_RGBA(t2, b2, x1); + R_DrawFogBoundarySection_rgba(t2, b2, x1); } } diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 5d7402634..9f07ff0bf 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -36,76 +36,76 @@ void rt_initcols_rgba(BYTE *buffer); void rt_span_coverage_rgba(int x, int start, int stop); -void rt_copy1col_RGBA(int hx, int sx, int yl, int yh); -void rt_copy4cols_RGBA(int sx, int yl, int yh); -void rt_shaded1col_RGBA(int hx, int sx, int yl, int yh); -void rt_shaded4cols_RGBA(int sx, int yl, int yh); -void rt_map1col_RGBA(int hx, int sx, int yl, int yh); -void rt_add1col_RGBA(int hx, int sx, int yl, int yh); -void rt_addclamp1col_RGBA(int hx, int sx, int yl, int yh); -void rt_subclamp1col_RGBA(int hx, int sx, int yl, int yh); -void rt_revsubclamp1col_RGBA(int hx, int sx, int yl, int yh); -void rt_tlate1col_RGBA(int hx, int sx, int yl, int yh); -void rt_tlateadd1col_RGBA(int hx, int sx, int yl, int yh); -void rt_tlateaddclamp1col_RGBA(int hx, int sx, int yl, int yh); -void rt_tlatesubclamp1col_RGBA(int hx, int sx, int yl, int yh); -void rt_tlaterevsubclamp1col_RGBA(int hx, int sx, int yl, int yh); -void rt_map4cols_RGBA(int sx, int yl, int yh); -void rt_add4cols_RGBA(int sx, int yl, int yh); -void rt_addclamp4cols_RGBA(int sx, int yl, int yh); -void rt_subclamp4cols_RGBA(int sx, int yl, int yh); -void rt_revsubclamp4cols_RGBA(int sx, int yl, int yh); -void rt_tlate4cols_RGBA(int sx, int yl, int yh); -void rt_tlateadd4cols_RGBA(int sx, int yl, int yh); -void rt_tlateaddclamp4cols_RGBA(int sx, int yl, int yh); -void rt_tlatesubclamp4cols_RGBA(int sx, int yl, int yh); -void rt_tlaterevsubclamp4cols_RGBA(int sx, int yl, int yh); +void rt_copy1col_rgba(int hx, int sx, int yl, int yh); +void rt_copy4cols_rgba(int sx, int yl, int yh); +void rt_shaded1col_rgba(int hx, int sx, int yl, int yh); +void rt_shaded4cols_rgba(int sx, int yl, int yh); +void rt_map1col_rgba(int hx, int sx, int yl, int yh); +void rt_add1col_rgba(int hx, int sx, int yl, int yh); +void rt_addclamp1col_rgba(int hx, int sx, int yl, int yh); +void rt_subclamp1col_rgba(int hx, int sx, int yl, int yh); +void rt_revsubclamp1col_rgba(int hx, int sx, int yl, int yh); +void rt_tlate1col_rgba(int hx, int sx, int yl, int yh); +void rt_tlateadd1col_rgba(int hx, int sx, int yl, int yh); +void rt_tlateaddclamp1col_rgba(int hx, int sx, int yl, int yh); +void rt_tlatesubclamp1col_rgba(int hx, int sx, int yl, int yh); +void rt_tlaterevsubclamp1col_rgba(int hx, int sx, int yl, int yh); +void rt_map4cols_rgba(int sx, int yl, int yh); +void rt_add4cols_rgba(int sx, int yl, int yh); +void rt_addclamp4cols_rgba(int sx, int yl, int yh); +void rt_subclamp4cols_rgba(int sx, int yl, int yh); +void rt_revsubclamp4cols_rgba(int sx, int yl, int yh); +void rt_tlate4cols_rgba(int sx, int yl, int yh); +void rt_tlateadd4cols_rgba(int sx, int yl, int yh); +void rt_tlateaddclamp4cols_rgba(int sx, int yl, int yh); +void rt_tlatesubclamp4cols_rgba(int sx, int yl, int yh); +void rt_tlaterevsubclamp4cols_rgba(int sx, int yl, int yh); -void R_DrawColumnHorizP_RGBA(); -void R_DrawColumnP_RGBA(); -void R_DrawFuzzColumnP_RGBA(); -void R_DrawTranslatedColumnP_RGBA(); -void R_DrawShadedColumnP_RGBA(); +void R_DrawColumnHoriz_rgba(); +void R_DrawColumn_rgba(); +void R_DrawFuzzColumn_rgba(); +void R_DrawTranslatedColumn_rgba(); +void R_DrawShadedColumn_rgba(); -void R_FillColumnP_RGBA(); -void R_FillAddColumn_RGBA(); -void R_FillAddClampColumn_RGBA(); -void R_FillSubClampColumn_RGBA(); -void R_FillRevSubClampColumn_RGBA(); -void R_DrawAddColumnP_RGBA(); -void R_DrawTlatedAddColumnP_RGBA(); -void R_DrawAddClampColumnP_RGBA(); -void R_DrawAddClampTranslatedColumnP_RGBA(); -void R_DrawSubClampColumnP_RGBA(); -void R_DrawSubClampTranslatedColumnP_RGBA(); -void R_DrawRevSubClampColumnP_RGBA(); -void R_DrawRevSubClampTranslatedColumnP_RGBA(); +void R_FillColumn_rgba(); +void R_FillAddColumn_rgba(); +void R_FillAddClampColumn_rgba(); +void R_FillSubClampColumn_rgba(); +void R_FillRevSubClampColumn_rgba(); +void R_DrawAddColumn_rgba(); +void R_DrawTlatedAddColumn_rgba(); +void R_DrawAddClampColumn_rgba(); +void R_DrawAddClampTranslatedColumn_rgba(); +void R_DrawSubClampColumn_rgba(); +void R_DrawSubClampTranslatedColumn_rgba(); +void R_DrawRevSubClampColumn_rgba(); +void R_DrawRevSubClampTranslatedColumn_rgba(); -void R_DrawSpanP_RGBA(void); -void R_DrawSpanMaskedP_RGBA(void); -void R_DrawSpanTranslucentP_RGBA(); -void R_DrawSpanMaskedTranslucentP_RGBA(); -void R_DrawSpanAddClampP_RGBA(); -void R_DrawSpanMaskedAddClampP_RGBA(); -void R_FillSpan_RGBA(); +void R_DrawSpan_rgba(void); +void R_DrawSpanMasked_rgba(void); +void R_DrawSpanTranslucent_rgba(); +void R_DrawSpanMaskedTranslucent_rgba(); +void R_DrawSpanAddClamp_rgba(); +void R_DrawSpanMaskedAddClamp_rgba(); +void R_FillSpan_rgba(); -void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip); +void R_DrawFogBoundary_rgba(int x1, int x2, short *uclip, short *dclip); -DWORD vlinec1_RGBA(); -void vlinec4_RGBA(); -DWORD mvlinec1_RGBA(); -void mvlinec4_RGBA(); -fixed_t tmvline1_add_RGBA(); -void tmvline4_add_RGBA(); -fixed_t tmvline1_addclamp_RGBA(); -void tmvline4_addclamp_RGBA(); -fixed_t tmvline1_subclamp_RGBA(); -void tmvline4_subclamp_RGBA(); -fixed_t tmvline1_revsubclamp_RGBA(); -void tmvline4_revsubclamp_RGBA(); +DWORD vlinec1_rgba(); +void vlinec4_rgba(); +DWORD mvlinec1_rgba(); +void mvlinec4_rgba(); +fixed_t tmvline1_add_rgba(); +void tmvline4_add_rgba(); +fixed_t tmvline1_addclamp_rgba(); +void tmvline4_addclamp_rgba(); +fixed_t tmvline1_subclamp_rgba(); +void tmvline4_subclamp_rgba(); +fixed_t tmvline1_revsubclamp_rgba(); +void tmvline4_revsubclamp_rgba(); -void R_FillColumnHorizP_RGBA(); -void R_FillSpan_RGBA(); +void R_FillColumnHoriz_rgba(); +void R_FillSpan_rgba(); ///////////////////////////////////////////////////////////////////////////// // Multithreaded rendering infrastructure: diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 32d5080c5..8f6d2ca13 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -1629,171 +1629,171 @@ public: ///////////////////////////////////////////////////////////////////////////// // Copies one span at hx to the screen at sx. -void rt_copy1col_RGBA (int hx, int sx, int yl, int yh) +void rt_copy1col_rgba (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Copies all four spans to the screen starting at sx. -void rt_copy4cols_RGBA (int sx, int yl, int yh) +void rt_copy4cols_rgba (int sx, int yl, int yh) { // To do: we could do this with SSE using __m128i - rt_copy1col_RGBA(0, sx, yl, yh); - rt_copy1col_RGBA(1, sx + 1, yl, yh); - rt_copy1col_RGBA(2, sx + 2, yl, yh); - rt_copy1col_RGBA(3, sx + 3, yl, yh); + rt_copy1col_rgba(0, sx, yl, yh); + rt_copy1col_rgba(1, sx + 1, yl, yh); + rt_copy1col_rgba(2, sx + 2, yl, yh); + rt_copy1col_rgba(3, sx + 3, yl, yh); } // Maps one span at hx to the screen at sx. -void rt_map1col_RGBA (int hx, int sx, int yl, int yh) +void rt_map1col_rgba (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Maps all four spans to the screen starting at sx. -void rt_map4cols_RGBA (int sx, int yl, int yh) +void rt_map4cols_rgba (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } -void rt_Translate1col_RGBA(const BYTE *translation, int hx, int yl, int yh) +void rt_Translate1col_rgba(const BYTE *translation, int hx, int yl, int yh) { DrawerCommandQueue::QueueCommand(translation, hx, yl, yh); } -void rt_Translate4cols_RGBA(const BYTE *translation, int yl, int yh) +void rt_Translate4cols_rgba(const BYTE *translation, int yl, int yh) { DrawerCommandQueue::QueueCommand(translation, yl, yh); } // Translates one span at hx to the screen at sx. -void rt_tlate1col_RGBA (int hx, int sx, int yl, int yh) +void rt_tlate1col_rgba (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA(dc_translation, hx, yl, yh); + rt_Translate1col_rgba(dc_translation, hx, yl, yh); rt_map1col(hx, sx, yl, yh); } // Translates all four spans to the screen starting at sx. -void rt_tlate4cols_RGBA (int sx, int yl, int yh) +void rt_tlate4cols_rgba (int sx, int yl, int yh) { - rt_Translate4cols_RGBA(dc_translation, yl, yh); + rt_Translate4cols_rgba(dc_translation, yl, yh); rt_map4cols(sx, yl, yh); } // Adds one span at hx to the screen at sx without clamping. -void rt_add1col_RGBA (int hx, int sx, int yl, int yh) +void rt_add1col_rgba (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Adds all four spans to the screen starting at sx without clamping. -void rt_add4cols_RGBA (int sx, int yl, int yh) +void rt_add4cols_rgba (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and adds one span at hx to the screen at sx without clamping. -void rt_tlateadd1col_RGBA (int hx, int sx, int yl, int yh) +void rt_tlateadd1col_rgba (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA(dc_translation, hx, yl, yh); + rt_Translate1col_rgba(dc_translation, hx, yl, yh); rt_add1col(hx, sx, yl, yh); } // Translates and adds all four spans to the screen starting at sx without clamping. -void rt_tlateadd4cols_RGBA(int sx, int yl, int yh) +void rt_tlateadd4cols_rgba(int sx, int yl, int yh) { - rt_Translate4cols_RGBA(dc_translation, yl, yh); + rt_Translate4cols_rgba(dc_translation, yl, yh); rt_add4cols(sx, yl, yh); } // Shades one span at hx to the screen at sx. -void rt_shaded1col_RGBA (int hx, int sx, int yl, int yh) +void rt_shaded1col_rgba (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Shades all four spans to the screen starting at sx. -void rt_shaded4cols_RGBA (int sx, int yl, int yh) +void rt_shaded4cols_rgba (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Adds one span at hx to the screen at sx with clamping. -void rt_addclamp1col_RGBA (int hx, int sx, int yl, int yh) +void rt_addclamp1col_rgba (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Adds all four spans to the screen starting at sx with clamping. -void rt_addclamp4cols_RGBA (int sx, int yl, int yh) +void rt_addclamp4cols_rgba (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and adds one span at hx to the screen at sx with clamping. -void rt_tlateaddclamp1col_RGBA (int hx, int sx, int yl, int yh) +void rt_tlateaddclamp1col_rgba (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA(dc_translation, hx, yl, yh); - rt_addclamp1col_RGBA(hx, sx, yl, yh); + rt_Translate1col_rgba(dc_translation, hx, yl, yh); + rt_addclamp1col_rgba(hx, sx, yl, yh); } // Translates and adds all four spans to the screen starting at sx with clamping. -void rt_tlateaddclamp4cols_RGBA (int sx, int yl, int yh) +void rt_tlateaddclamp4cols_rgba (int sx, int yl, int yh) { - rt_Translate4cols_RGBA(dc_translation, yl, yh); + rt_Translate4cols_rgba(dc_translation, yl, yh); rt_addclamp4cols(sx, yl, yh); } // Subtracts one span at hx to the screen at sx with clamping. -void rt_subclamp1col_RGBA (int hx, int sx, int yl, int yh) +void rt_subclamp1col_rgba (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Subtracts all four spans to the screen starting at sx with clamping. -void rt_subclamp4cols_RGBA (int sx, int yl, int yh) +void rt_subclamp4cols_rgba (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and subtracts one span at hx to the screen at sx with clamping. -void rt_tlatesubclamp1col_RGBA (int hx, int sx, int yl, int yh) +void rt_tlatesubclamp1col_rgba (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA(dc_translation, hx, yl, yh); - rt_subclamp1col_RGBA(hx, sx, yl, yh); + rt_Translate1col_rgba(dc_translation, hx, yl, yh); + rt_subclamp1col_rgba(hx, sx, yl, yh); } // Translates and subtracts all four spans to the screen starting at sx with clamping. -void rt_tlatesubclamp4cols_RGBA (int sx, int yl, int yh) +void rt_tlatesubclamp4cols_rgba (int sx, int yl, int yh) { - rt_Translate4cols_RGBA(dc_translation, yl, yh); - rt_subclamp4cols_RGBA(sx, yl, yh); + rt_Translate4cols_rgba(dc_translation, yl, yh); + rt_subclamp4cols_rgba(sx, yl, yh); } // Subtracts one span at hx from the screen at sx with clamping. -void rt_revsubclamp1col_RGBA (int hx, int sx, int yl, int yh) +void rt_revsubclamp1col_rgba (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Subtracts all four spans from the screen starting at sx with clamping. -void rt_revsubclamp4cols_RGBA (int sx, int yl, int yh) +void rt_revsubclamp4cols_rgba (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and subtracts one span at hx from the screen at sx with clamping. -void rt_tlaterevsubclamp1col_RGBA (int hx, int sx, int yl, int yh) +void rt_tlaterevsubclamp1col_rgba (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA(dc_translation, hx, yl, yh); - rt_revsubclamp1col_RGBA(hx, sx, yl, yh); + rt_Translate1col_rgba(dc_translation, hx, yl, yh); + rt_revsubclamp1col_rgba(hx, sx, yl, yh); } // Translates and subtracts all four spans from the screen starting at sx with clamping. -void rt_tlaterevsubclamp4cols_RGBA (int sx, int yl, int yh) +void rt_tlaterevsubclamp4cols_rgba (int sx, int yl, int yh) { - rt_Translate4cols_RGBA(dc_translation, yl, yh); - rt_revsubclamp4cols_RGBA(sx, yl, yh); + rt_Translate4cols_rgba(dc_translation, yl, yh); + rt_revsubclamp4cols_rgba(sx, yl, yh); } // Before each pass through a rendering loop that uses these routines, @@ -1816,7 +1816,7 @@ void rt_span_coverage_rgba(int x, int start, int stop) // Stretches a column into a temporary buffer which is later // drawn to the screen along with up to three other columns. -void R_DrawColumnHorizP_RGBA (void) +void R_DrawColumnHoriz_rgba (void) { if (dc_count <= 0) return; @@ -1831,7 +1831,7 @@ void R_DrawColumnHorizP_RGBA (void) } // [RH] Just fills a column with a given color -void R_FillColumnHorizP_RGBA (void) +void R_FillColumnHoriz_rgba (void) { if (dc_count <= 0) return; diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 1cde16071..1a08d1793 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -477,7 +477,7 @@ void R_MapTiltedPlane_C (int y, int x1) #endif } -void R_MapTiltedPlane_RGBA (int y, int x1) +void R_MapTiltedPlane_rgba (int y, int x1) { int x2 = spanend[y]; @@ -509,7 +509,7 @@ void R_MapColoredPlane_C (int y, int x1) memset (ylookup[y] + x1 + dc_destorg, ds_color, (spanend[y] - x1 + 1)); } -void R_MapColoredPlane_RGBA(int y, int x1) +void R_MapColoredPlane_rgba(int y, int x1) { uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; int count = (spanend[y] - x1 + 1); diff --git a/src/r_plane.h b/src/r_plane.h index 7505ac995..b199d3477 100644 --- a/src/r_plane.h +++ b/src/r_plane.h @@ -97,9 +97,9 @@ extern void(*R_MapColoredPlane)(int y, int x1); extern void(*R_MapTiltedPlane)(int y, int x1); void R_MapTiltedPlane_C(int y, int x1); -void R_MapTiltedPlane_RGBA(int y, int x); +void R_MapTiltedPlane_rgba(int y, int x); void R_MapColoredPlane_C(int y, int x1); -void R_MapColoredPlane_RGBA(int y, int x1); +void R_MapColoredPlane_rgba(int y, int x1); visplane_t *R_FindPlane ( const secplane_t &height, diff --git a/src/r_things.cpp b/src/r_things.cpp index f1f29f160..0858dce2f 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -2676,7 +2676,7 @@ void R_DrawParticle_C (vissprite_t *vis) } } -void R_DrawParticle_RGBA(vissprite_t *vis) +void R_DrawParticle_rgba(vissprite_t *vis) { int spacing; uint32_t *dest; diff --git a/src/r_things.h b/src/r_things.h index 785729b09..f5cd30e00 100644 --- a/src/r_things.h +++ b/src/r_things.h @@ -98,7 +98,7 @@ struct particle_t; extern void(*R_DrawParticle)(vissprite_t *); void R_DrawParticle_C (vissprite_t *); -void R_DrawParticle_RGBA (vissprite_t *); +void R_DrawParticle_rgba (vissprite_t *); void R_ProjectParticle (particle_t *, const sector_t *sector, int shade, int fakeside); From 9c8c1e0ea51534d8c6d5fba8ed0c58a965aa88e8 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 14 Jun 2016 00:27:08 +0200 Subject: [PATCH 44/94] Fixed window transparency bug --- src/r_drawt_rgba.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 8f6d2ca13..d2d715c8d 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -588,6 +588,8 @@ class RtAdd4colsRGBACommand : public DrawerCommand fixed_t dc_light; ShadeConstants dc_shade_constants; BYTE *dc_colormap; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; public: RtAdd4colsRGBACommand(int sx, int yl, int yh) @@ -601,6 +603,8 @@ public: dc_light = ::dc_light; dc_shade_constants = ::dc_shade_constants; dc_colormap = ::dc_colormap; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; } #ifdef NO_SSE @@ -722,10 +726,10 @@ public: __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); do { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; // shade_pal_index: __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); From 77c4786b9d716ab018ec4b082490b6ed78f5cc36 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 14 Jun 2016 23:05:20 +0200 Subject: [PATCH 45/94] Minor code cleanup --- src/f_wipe.cpp | 5 +- src/r_draw.cpp | 57 +++-------- src/r_draw.h | 19 +++- src/r_draw_rgba.h | 213 +++++++++++++++++++++++++++++++++++++++++ src/r_drawt.cpp | 32 +++---- src/r_main.h | 223 ------------------------------------------- src/r_plane.cpp | 28 +++--- src/r_swrenderer.cpp | 2 +- src/r_things.cpp | 5 +- src/v_draw.cpp | 1 + 10 files changed, 277 insertions(+), 308 deletions(-) diff --git a/src/f_wipe.cpp b/src/f_wipe.cpp index 84b6036e4..aa9038eeb 100644 --- a/src/f_wipe.cpp +++ b/src/f_wipe.cpp @@ -78,7 +78,7 @@ bool wipe_initMelt (int ticks) int i, r; // copy start screen to main screen - screen->DrawBlock(0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); + screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); // makes this wipe faster (in theory) // to have stuff in column-major format @@ -271,8 +271,7 @@ bool wipe_doBurn (int ticks) // Draw the screen int xstep, ystep, firex, firey; int x, y; - BYTE *to; - BYTE *fromold, *fromnew; + BYTE *to, *fromold, *fromnew; const int SHIFT = 16; xstep = (FIREWIDTH << SHIFT) / SCREENWIDTH; diff --git a/src/r_draw.cpp b/src/r_draw.cpp index ecb4441f8..4dcdc3e6b 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -63,7 +63,7 @@ extern int ST_Y; BYTE* viewimage; extern "C" { int ylookup[MAXHEIGHT]; -BYTE* dc_destorg; +BYTE *dc_destorg; } int scaledviewwidth; @@ -276,7 +276,7 @@ void R_DrawColumnP_C (void) { // Re-map color indices from wall texture column // using a lighting/special effects LUT. - *dest = colormap[source[frac >> FRACBITS]]; + *dest = colormap[source[frac>>FRACBITS]]; dest += pitch; frac += fracstep; @@ -321,13 +321,12 @@ void R_FillAddColumn_C (void) return; dest = dc_dest; - int pitch = dc_pitch; - DWORD *bg2rgb; DWORD fg; bg2rgb = dc_destblend; fg = dc_srccolor; + int pitch = dc_pitch; do { @@ -348,13 +347,12 @@ void R_FillAddClampColumn_C (void) return; dest = dc_dest; - int pitch = dc_pitch; - DWORD *bg2rgb; DWORD fg; bg2rgb = dc_destblend; fg = dc_srccolor; + int pitch = dc_pitch; do { @@ -381,13 +379,12 @@ void R_FillSubClampColumn_C (void) return; dest = dc_dest; - int pitch = dc_pitch; - DWORD *bg2rgb; DWORD fg; bg2rgb = dc_destblend; fg = dc_srccolor | 0x40100400; + int pitch = dc_pitch; do { @@ -413,13 +410,12 @@ void R_FillRevSubClampColumn_C (void) return; dest = dc_dest; - int pitch = dc_pitch; - DWORD *bg2rgb; DWORD fg; bg2rgb = dc_destblend; fg = dc_srccolor; + int pitch = dc_pitch; do { @@ -672,13 +668,14 @@ void R_DrawTranslatedColumnP_C (void) { *dest = colormap[translation[source[frac>>FRACBITS]]]; dest += pitch; + frac += fracstep; } while (--count); } } // Draw a column that is both translated and translucent -void R_DrawTlatedAddColumnP_C() +void R_DrawTlatedAddColumnP_C (void) { int count; BYTE *dest; @@ -772,15 +769,15 @@ void R_DrawAddClampColumnP_C () frac = dc_texturefrac; { - const BYTE *source = dc_source; BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; int pitch = dc_pitch; DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; do { - DWORD a = fg2rgb[colormap[source[frac >> FRACBITS]]] + bg2rgb[*dest]; + DWORD a = fg2rgb[colormap[source[frac>>FRACBITS]]] + bg2rgb[*dest]; DWORD b = a; a |= 0x01f07c1f; @@ -788,7 +785,7 @@ void R_DrawAddClampColumnP_C () a &= 0x3fffffff; b = b - (b >> 5); a |= b; - *dest = RGB32k.All[a & (a >> 15)]; + *dest = RGB32k.All[a & (a>>15)]; dest += pitch; frac += fracstep; } while (--count); @@ -1190,9 +1187,6 @@ void R_DrawSpanP_C (void) } while (--count); } } -#endif - -#ifndef X86_ASM // [RH] Draw a span with holes void R_DrawSpanMaskedP_C (void) @@ -1282,8 +1276,6 @@ void R_DrawSpanTranslucentP_C (void) xstep = ds_xstep; ystep = ds_ystep; - uint32_t light = calc_light_multiplier(ds_light); - if (ds_xbits == 6 && ds_ybits == 6) { // 64x64 is the most common case by far, so special case it. @@ -1334,8 +1326,6 @@ void R_DrawSpanMaskedTranslucentP_C (void) DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; - uint32_t light = calc_light_multiplier(ds_light); - xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -1426,7 +1416,6 @@ void R_DrawSpanAddClampP_C (void) do { spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); - DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; DWORD b = a; @@ -1436,7 +1425,6 @@ void R_DrawSpanAddClampP_C (void) b = b - (b >> 5); a |= b; *dest++ = RGB32k.All[a & (a>>15)]; - xfrac += xstep; yfrac += ystep; } while (--count); @@ -1449,7 +1437,6 @@ void R_DrawSpanAddClampP_C (void) do { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; DWORD b = a; @@ -1459,14 +1446,12 @@ void R_DrawSpanAddClampP_C (void) b = b - (b >> 5); a |= b; *dest++ = RGB32k.All[a & (a>>15)]; - xfrac += xstep; yfrac += ystep; } while (--count); } } - void R_DrawSpanMaskedAddClampP_C (void) { dsfixed_t xfrac; @@ -1481,8 +1466,6 @@ void R_DrawSpanMaskedAddClampP_C (void) DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; - uint32_t light = calc_light_multiplier(ds_light); - xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -1552,7 +1535,7 @@ void R_DrawSpanMaskedAddClampP_C (void) // [RH] Just fill a span with a color void R_FillSpan_C (void) { - memset (ylookup[ds_y] + ds_x1 + dc_destorg, ds_color, (ds_x2 - ds_x1 + 1)); + memset (ylookup[ds_y] + ds_x1 + dc_destorg, ds_color, ds_x2 - ds_x1 + 1); } @@ -1759,7 +1742,7 @@ DWORD vlinec1 () do { - *dest = colormap[source[frac >> bits]]; + *dest = colormap[source[frac>>bits]]; frac += fracstep; dest += pitch; } while (--count); @@ -1830,9 +1813,7 @@ DWORD mvlinec1 () return frac; } -#endif -#if !defined(X86_ASM) void mvlinec4 () { BYTE *dest = dc_dest; @@ -1843,6 +1824,7 @@ void mvlinec4 () do { BYTE pix; + pix = bufplce[0][(place=vplce[0])>>bits]; if(pix) dest[0] = palookupoffse[0][pix]; vplce[0] = place+vince[0]; pix = bufplce[1][(place=vplce[1])>>bits]; if(pix) dest[1] = palookupoffse[1][pix]; vplce[1] = place+vince[1]; pix = bufplce[2][(place=vplce[2])>>bits]; if(pix) dest[2] = palookupoffse[2][pix]; vplce[2] = place+vince[2]; @@ -1879,7 +1861,6 @@ static void R_DrawFogBoundaryLine (int y, int x) int x2 = spanend[y]; BYTE *colormap = dc_colormap; BYTE *dest = ylookup[y] + dc_destorg; - do { dest[x] = colormap[dest[x]]; @@ -1996,8 +1977,6 @@ fixed_t tmvline1_add_C () DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; - uint32_t light = calc_light_multiplier(dc_light); - do { BYTE pix = source[frac>>bits]; @@ -2024,12 +2003,6 @@ void tmvline4_add_C () DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; - uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); - do { for (int i = 0; i < 4; ++i) @@ -2062,8 +2035,6 @@ fixed_t tmvline1_addclamp_C () DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; - uint32_t light = calc_light_multiplier(dc_light); - do { BYTE pix = source[frac>>bits]; diff --git a/src/r_draw.h b/src/r_draw.h index cea05e469..a31183405 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -32,7 +32,20 @@ extern "C" int fuzzpos; extern "C" int fuzzviewheight; struct FColormap; -struct ShadeConstants; + +struct ShadeConstants +{ + uint16_t light_alpha; + uint16_t light_red; + uint16_t light_green; + uint16_t light_blue; + uint16_t fade_alpha; + uint16_t fade_red; + uint16_t fade_green; + uint16_t fade_blue; + uint16_t desaturate; + bool simple_shade; +}; extern "C" int ylookup[MAXHEIGHT]; @@ -58,7 +71,7 @@ extern "C" fixed_t dc_destalpha; // first pixel in a column extern "C" const BYTE* dc_source; -extern "C" BYTE* dc_dest, *dc_destorg; +extern "C" BYTE *dc_dest, *dc_destorg; extern "C" int dc_count; extern "C" DWORD vplce[4]; @@ -68,7 +81,7 @@ extern "C" fixed_t palookuplight[4]; extern "C" const BYTE* bufplce[4]; // [RH] Temporary buffer for column drawing -extern "C" BYTE *dc_temp; +extern "C" BYTE *dc_temp; extern "C" unsigned int dc_tspans[4][MAXHEIGHT]; extern "C" unsigned int *dc_ctspan[4]; extern "C" unsigned int horizspans[4]; diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 9f07ff0bf..47ea75260 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -24,6 +24,7 @@ #define __R_DRAW_RGBA__ #include "r_draw.h" +#include "v_palette.h" #include #include #include @@ -273,4 +274,216 @@ public: void Execute(DrawerThread *thread) override; }; +///////////////////////////////////////////////////////////////////////////// +// Pixel shading macros and inline functions: + +// Give the compiler a strong hint we want these functions inlined: +#ifndef FORCEINLINE +#if defined(_MSC_VER) +#define FORCEINLINE __forceinline +#elif defined(__GNUC__) +#define FORCEINLINE __attribute__((always_inline)) inline +#else +#define FORCEINLINE inline +#endif +#endif + +// calculates the light constant passed to the shade_pal_index function +FORCEINLINE uint32_t calc_light_multiplier(dsfixed_t light) +{ + return 256 - (light >> (FRACBITS - 8)); +} + +// Calculates a ARGB8 color for the given palette index and light multiplier +FORCEINLINE uint32_t shade_pal_index_simple(uint32_t index, uint32_t light) +{ + const PalEntry &color = GPalette.BaseColors[index]; + uint32_t red = color.r; + uint32_t green = color.g; + uint32_t blue = color.b; + + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + +FORCEINLINE uint32_t shade_bgra_simple(uint32_t color, uint32_t light) +{ + uint32_t red = (color >> 16) & 0xff; + uint32_t green = (color >> 8) & 0xff; + uint32_t blue = color & 0xff; + + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + +// Calculates a ARGB8 color for the given palette index, light multiplier and dynamic colormap +FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants) +{ + const PalEntry &color = GPalette.BaseColors[index]; + uint32_t red = color.r; + uint32_t green = color.g; + uint32_t blue = color.b; + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; + + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; + + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; + + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + +FORCEINLINE uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConstants &constants) +{ + uint32_t red = (color >> 16) & 0xff; + uint32_t green = (color >> 8) & 0xff; + uint32_t blue = color & 0xff; + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; + + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; + + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; + + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + +FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) +{ + uint32_t fg_alpha = (fg >> 24) & 0xff; + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t alpha = fg_alpha + (fg_alpha >> 7); // 255 -> 256 + uint32_t inv_alpha = 256 - alpha; + + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = bg & 0xff; + + uint32_t red = ((fg_red * alpha) + (bg_red * inv_alpha)) / 256; + uint32_t green = ((fg_green * alpha) + (bg_green * inv_alpha)) / 256; + uint32_t blue = ((fg_blue * alpha) + (bg_blue * inv_alpha)) / 256; + + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + +// Calculate constants for a simple shade +#define SSE_SHADE_SIMPLE_INIT(light) \ + __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ + __m128i mlight_lo = mlight_hi; + +// Calculate constants for a simple shade with different light levels for each pixel +#define SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0) \ + __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ + __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); + +// Simple shade 4 pixels +#define SSE_SHADE_SIMPLE(fg) { \ + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \ + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ + fg_hi = _mm_mullo_epi16(fg_hi, mlight_hi); \ + fg_hi = _mm_srli_epi16(fg_hi, 8); \ + fg_lo = _mm_mullo_epi16(fg_lo, mlight_lo); \ + fg_lo = _mm_srli_epi16(fg_lo, 8); \ + fg = _mm_packus_epi16(fg_lo, fg_hi); \ +} + +// Calculate constants for a complex shade +#define SSE_SHADE_INIT(light, shade_constants) \ + __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ + __m128i mlight_lo = mlight_hi; \ + __m128i color = _mm_set_epi16( \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ + __m128i fade = _mm_set_epi16( \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ + __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ + __m128i fade_amount_lo = fade_amount_hi; \ + __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ + +// Calculate constants for a complex shade with different light levels for each pixel +#define SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants) \ + __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ + __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); \ + __m128i color = _mm_set_epi16( \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ + __m128i fade = _mm_set_epi16( \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ + __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ + __m128i fade_amount_lo = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_lo)); \ + __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ + +// Complex shade 4 pixels +#define SSE_SHADE(fg, shade_constants) { \ + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \ + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ + \ + __m128i intensity_hi = _mm_mullo_epi16(fg_hi, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \ + uint16_t intensity_hi0 = ((_mm_extract_epi16(intensity_hi, 2) + _mm_extract_epi16(intensity_hi, 1) + _mm_extract_epi16(intensity_hi, 0)) >> 8) * shade_constants.desaturate; \ + uint16_t intensity_hi1 = ((_mm_extract_epi16(intensity_hi, 6) + _mm_extract_epi16(intensity_hi, 5) + _mm_extract_epi16(intensity_hi, 4)) >> 8) * shade_constants.desaturate; \ + intensity_hi = _mm_set_epi16(intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi0, intensity_hi0, intensity_hi0, intensity_hi0); \ + \ + fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, inv_desaturate), intensity_hi), 8); \ + fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mlight_hi), fade_amount_hi), 8); \ + fg_hi = _mm_srli_epi16(_mm_mullo_epi16(fg_hi, color), 8); \ + \ + __m128i intensity_lo = _mm_mullo_epi16(fg_lo, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \ + uint16_t intensity_lo0 = ((_mm_extract_epi16(intensity_lo, 2) + _mm_extract_epi16(intensity_lo, 1) + _mm_extract_epi16(intensity_lo, 0)) >> 8) * shade_constants.desaturate; \ + uint16_t intensity_lo1 = ((_mm_extract_epi16(intensity_lo, 6) + _mm_extract_epi16(intensity_lo, 5) + _mm_extract_epi16(intensity_lo, 4)) >> 8) * shade_constants.desaturate; \ + intensity_lo = _mm_set_epi16(intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo0, intensity_lo0, intensity_lo0, intensity_lo0); \ + \ + fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, inv_desaturate), intensity_lo), 8); \ + fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mlight_lo), fade_amount_lo), 8); \ + fg_lo = _mm_srli_epi16(_mm_mullo_epi16(fg_lo, color), 8); \ + \ + fg = _mm_packus_epi16(fg_lo, fg_hi); \ +} + #endif diff --git a/src/r_drawt.cpp b/src/r_drawt.cpp index c829c2dc4..837093044 100644 --- a/src/r_drawt.cpp +++ b/src/r_drawt.cpp @@ -340,13 +340,13 @@ void rt_add1col_c (int hx, int sx, int yl, int yh) return; count++; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; colormap = dc_colormap; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; do { DWORD fg = colormap[*source]; DWORD bg = *dest; @@ -374,14 +374,13 @@ void rt_add4cols_c (int sx, int yl, int yh) return; count++; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4]; pitch = dc_pitch; colormap = dc_colormap; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - do { DWORD fg = colormap[source[0]]; DWORD bg = dest[0]; @@ -434,6 +433,7 @@ void rt_tlateadd4cols_c (int sx, int yl, int yh) // Shades one span at hx to the screen at sx. void rt_shaded1col_c (int hx, int sx, int yl, int yh) { + DWORD *fgstart; BYTE *colormap; BYTE *source; BYTE *dest; @@ -445,14 +445,12 @@ void rt_shaded1col_c (int hx, int sx, int yl, int yh) return; count++; + fgstart = &Col2RGB8[0][dc_color]; colormap = dc_colormap; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; - DWORD *fgstart; - fgstart = &Col2RGB8[0][dc_color]; - do { DWORD val = colormap[*source]; DWORD fg = fgstart[val<<8]; @@ -466,6 +464,7 @@ void rt_shaded1col_c (int hx, int sx, int yl, int yh) // Shades all four spans to the screen starting at sx. void rt_shaded4cols_c (int sx, int yl, int yh) { + DWORD *fgstart; BYTE *colormap; BYTE *source; BYTE *dest; @@ -477,14 +476,12 @@ void rt_shaded4cols_c (int sx, int yl, int yh) return; count++; + fgstart = &Col2RGB8[0][dc_color]; colormap = dc_colormap; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4]; pitch = dc_pitch; - DWORD *fgstart; - fgstart = &Col2RGB8[0][dc_color]; - do { DWORD val; @@ -523,14 +520,13 @@ void rt_addclamp1col_c (int hx, int sx, int yl, int yh) return; count++; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; colormap = dc_colormap; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - do { DWORD a = fg2rgb[colormap[*source]] + bg2rgb[*dest]; DWORD b = a; @@ -639,13 +635,13 @@ void rt_subclamp1col_c (int hx, int sx, int yl, int yh) return; count++; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; colormap = dc_colormap; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; do { DWORD a = (fg2rgb[colormap[*source]] | 0x40100400) - bg2rgb[*dest]; DWORD b = a; @@ -674,13 +670,13 @@ void rt_subclamp4cols_c (int sx, int yl, int yh) return; count++; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4]; pitch = dc_pitch; colormap = dc_colormap; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; do { DWORD a = (fg2rgb[colormap[source[0]]] | 0x40100400) - bg2rgb[dest[0]]; DWORD b = a; diff --git a/src/r_main.h b/src/r_main.h index d71d44fe1..fa8fe0bb1 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -90,229 +90,6 @@ extern bool r_dontmaplines; // Converts fixedlightlev into a shade value #define FIXEDLIGHT2SHADE(lightlev) (((lightlev) >> COLORMAPSHIFT) << FRACBITS) -struct ShadeConstants -{ - uint16_t light_alpha; - uint16_t light_red; - uint16_t light_green; - uint16_t light_blue; - uint16_t fade_alpha; - uint16_t fade_red; - uint16_t fade_green; - uint16_t fade_blue; - uint16_t desaturate; - bool simple_shade; -}; - -// calculates the light constant passed to the shade_pal_index function -inline uint32_t calc_light_multiplier(dsfixed_t light) -{ - return 256 - (light >> (FRACBITS - 8)); -} - -// Give the compiler a strong hint we want these functions inlined: -#ifndef FORCEINLINE -#if defined(_MSC_VER) -#define FORCEINLINE __forceinline -#elif defined(__GNUC__) -#define FORCEINLINE __attribute__((always_inline)) inline -#else -#define FORCEINLINE inline -#endif -#endif - -// Calculates a ARGB8 color for the given palette index and light multiplier -FORCEINLINE uint32_t shade_pal_index_simple(uint32_t index, uint32_t light) -{ - const PalEntry &color = GPalette.BaseColors[index]; - uint32_t red = color.r; - uint32_t green = color.g; - uint32_t blue = color.b; - - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; - - return 0xff000000 | (red << 16) | (green << 8) | blue; -} - -FORCEINLINE uint32_t shade_bgra_simple(uint32_t color, uint32_t light) -{ - uint32_t red = (color >> 16) & 0xff; - uint32_t green = (color >> 8) & 0xff; - uint32_t blue = color & 0xff; - - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; - - return 0xff000000 | (red << 16) | (green << 8) | blue; -} - -// Calculates a ARGB8 color for the given palette index, light multiplier and dynamic colormap -FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants) -{ - const PalEntry &color = GPalette.BaseColors[index]; - uint32_t red = color.r; - uint32_t green = color.g; - uint32_t blue = color.b; - if (constants.simple_shade) - { - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; - } - else - { - uint32_t inv_light = 256 - light; - uint32_t inv_desaturate = 256 - constants.desaturate; - - uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; - - red = (red * inv_desaturate + intensity) / 256; - green = (green * inv_desaturate + intensity) / 256; - blue = (blue * inv_desaturate + intensity) / 256; - - red = (constants.fade_red * inv_light + red * light) / 256; - green = (constants.fade_green * inv_light + green * light) / 256; - blue = (constants.fade_blue * inv_light + blue * light) / 256; - - red = (red * constants.light_red) / 256; - green = (green * constants.light_green) / 256; - blue = (blue * constants.light_blue) / 256; - } - return 0xff000000 | (red << 16) | (green << 8) | blue; -} - -FORCEINLINE uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConstants &constants) -{ - uint32_t red = (color >> 16) & 0xff; - uint32_t green = (color >> 8) & 0xff; - uint32_t blue = color & 0xff; - if (constants.simple_shade) - { - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; - } - else - { - uint32_t inv_light = 256 - light; - uint32_t inv_desaturate = 256 - constants.desaturate; - - uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; - - red = (red * inv_desaturate + intensity) / 256; - green = (green * inv_desaturate + intensity) / 256; - blue = (blue * inv_desaturate + intensity) / 256; - - red = (constants.fade_red * inv_light + red * light) / 256; - green = (constants.fade_green * inv_light + green * light) / 256; - blue = (constants.fade_blue * inv_light + blue * light) / 256; - - red = (red * constants.light_red) / 256; - green = (green * constants.light_green) / 256; - blue = (blue * constants.light_blue) / 256; - } - return 0xff000000 | (red << 16) | (green << 8) | blue; -} - -FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) -{ - uint32_t fg_alpha = (fg >> 24) & 0xff; - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t alpha = fg_alpha + (fg_alpha >> 7); // 255 -> 256 - uint32_t inv_alpha = 256 - alpha; - - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = bg & 0xff; - - uint32_t red = ((fg_red * alpha) + (bg_red * inv_alpha)) / 256; - uint32_t green = ((fg_green * alpha) + (bg_green * inv_alpha)) / 256; - uint32_t blue = ((fg_blue * alpha) + (bg_blue * inv_alpha)) / 256; - - return 0xff000000 | (red << 16) | (green << 8) | blue; -} - -// Calculate constants for a simple shade -#define SSE_SHADE_SIMPLE_INIT(light) \ - __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ - __m128i mlight_lo = mlight_hi; - -// Calculate constants for a simple shade with different light levels for each pixel -#define SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0) \ - __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ - __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); - -// Simple shade 4 pixels -#define SSE_SHADE_SIMPLE(fg) { \ - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \ - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ - fg_hi = _mm_mullo_epi16(fg_hi, mlight_hi); \ - fg_hi = _mm_srli_epi16(fg_hi, 8); \ - fg_lo = _mm_mullo_epi16(fg_lo, mlight_lo); \ - fg_lo = _mm_srli_epi16(fg_lo, 8); \ - fg = _mm_packus_epi16(fg_lo, fg_hi); \ -} - -// Calculate constants for a complex shade -#define SSE_SHADE_INIT(light, shade_constants) \ - __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ - __m128i mlight_lo = mlight_hi; \ - __m128i color = _mm_set_epi16( \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ - __m128i fade = _mm_set_epi16( \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ - __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ - __m128i fade_amount_lo = fade_amount_hi; \ - __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ - -// Calculate constants for a complex shade with different light levels for each pixel -#define SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants) \ - __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ - __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); \ - __m128i color = _mm_set_epi16( \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ - __m128i fade = _mm_set_epi16( \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ - __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ - __m128i fade_amount_lo = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_lo)); \ - __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ - -// Complex shade 4 pixels -#define SSE_SHADE(fg, shade_constants) { \ - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \ - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ - \ - __m128i intensity_hi = _mm_mullo_epi16(fg_hi, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \ - uint16_t intensity_hi0 = ((_mm_extract_epi16(intensity_hi, 2) + _mm_extract_epi16(intensity_hi, 1) + _mm_extract_epi16(intensity_hi, 0)) >> 8) * shade_constants.desaturate; \ - uint16_t intensity_hi1 = ((_mm_extract_epi16(intensity_hi, 6) + _mm_extract_epi16(intensity_hi, 5) + _mm_extract_epi16(intensity_hi, 4)) >> 8) * shade_constants.desaturate; \ - intensity_hi = _mm_set_epi16(intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi0, intensity_hi0, intensity_hi0, intensity_hi0); \ - \ - fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, inv_desaturate), intensity_hi), 8); \ - fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mlight_hi), fade_amount_hi), 8); \ - fg_hi = _mm_srli_epi16(_mm_mullo_epi16(fg_hi, color), 8); \ - \ - __m128i intensity_lo = _mm_mullo_epi16(fg_lo, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \ - uint16_t intensity_lo0 = ((_mm_extract_epi16(intensity_lo, 2) + _mm_extract_epi16(intensity_lo, 1) + _mm_extract_epi16(intensity_lo, 0)) >> 8) * shade_constants.desaturate; \ - uint16_t intensity_lo1 = ((_mm_extract_epi16(intensity_lo, 6) + _mm_extract_epi16(intensity_lo, 5) + _mm_extract_epi16(intensity_lo, 4)) >> 8) * shade_constants.desaturate; \ - intensity_lo = _mm_set_epi16(intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo0, intensity_lo0, intensity_lo0, intensity_lo0); \ - \ - fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, inv_desaturate), intensity_lo), 8); \ - fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mlight_lo), fade_amount_lo), 8); \ - fg_lo = _mm_srli_epi16(_mm_mullo_epi16(fg_lo, color), 8); \ - \ - fg = _mm_packus_epi16(fg_lo, fg_hi); \ -} - extern bool r_swtruecolor; extern double GlobVis; diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 1a08d1793..807066f77 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -58,6 +58,7 @@ #include "r_3dfloors.h" #include "v_palette.h" #include "r_data/colormaps.h" +#include "r_draw_rgba.h" #ifdef _MSC_VER #pragma warning(disable:4244) @@ -506,7 +507,7 @@ void R_MapTiltedPlane_rgba (int y, int x1) void R_MapColoredPlane_C (int y, int x1) { - memset (ylookup[y] + x1 + dc_destorg, ds_color, (spanend[y] - x1 + 1)); + memset (ylookup[y] + x1 + dc_destorg, ds_color, spanend[y] - x1 + 1); } void R_MapColoredPlane_rgba(int y, int x1) @@ -1710,7 +1711,7 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t // //========================================================================== -void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) +void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) { static const float ifloatpow2[16] = { @@ -1745,7 +1746,7 @@ void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t a // p is the texture origin in view space // Don't add in the offsets at this stage, because doing so can result in // errors if the flat is rotated. - ang = M_PI * 3 / 2 - ViewAngle.Radians(); + ang = M_PI*3/2 - ViewAngle.Radians(); cosine = cos(ang), sine = sin(ang); p[0] = ViewPos.X * cosine - ViewPos.Y * sine; p[2] = ViewPos.X * sine + ViewPos.Y * cosine; @@ -1756,25 +1757,25 @@ void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t a cosine = cos(ang), sine = sin(ang); m[0] = yscale * cosine; m[2] = yscale * sine; - // m[1] = pl->height.ZatPointF (0, iyscale) - pl->height.ZatPointF (0,0)); - // VectorScale2 (m, 64.f/VectorLength(m)); +// m[1] = pl->height.ZatPointF (0, iyscale) - pl->height.ZatPointF (0,0)); +// VectorScale2 (m, 64.f/VectorLength(m)); - // n is the u direction vector in view space + // n is the u direction vector in view space #if 0 //let's use the sin/cosine we already know instead of computing new ones - ang += M_PI / 2 - n[0] = -xscale * cos(ang); + ang += M_PI/2 + n[0] = -xscale * cos(ang); n[2] = -xscale * sin(ang); #else n[0] = xscale * sine; n[2] = -xscale * cosine; #endif - // n[1] = pl->height.ZatPointF (ixscale, 0) - pl->height.ZatPointF (0,0)); - // VectorScale2 (n, 64.f/VectorLength(n)); +// n[1] = pl->height.ZatPointF (ixscale, 0) - pl->height.ZatPointF (0,0)); +// VectorScale2 (n, 64.f/VectorLength(n)); - // This code keeps the texture coordinates constant across the x,y plane no matter - // how much you slope the surface. Use the commented-out code above instead to keep - // the textures a constant size across the surface's plane instead. + // This code keeps the texture coordinates constant across the x,y plane no matter + // how much you slope the surface. Use the commented-out code above instead to keep + // the textures a constant size across the surface's plane instead. cosine = cos(planeang), sine = sin(planeang); m[1] = pl->height.ZatPoint(ViewPos.X + yscale * sine, ViewPos.Y + yscale * cosine) - zeroheight; n[1] = pl->height.ZatPoint(ViewPos.X - xscale * cosine, ViewPos.Y + xscale * sine) - zeroheight; @@ -1807,7 +1808,6 @@ void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t a if (pl->height.fC() > 0) planelightfloat = -planelightfloat; - ds_light = 0; if (fixedlightlev >= 0) { R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index fbbd65b17..c4347236d 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -96,7 +96,7 @@ void FSoftwareRenderer::PrecacheTexture(FTexture *tex, int cache) else if (cache != 0) { if (r_swtruecolor) - tex->GetPixels(); + tex->GetPixelsBgra(); else tex->GetPixels (); } diff --git a/src/r_things.cpp b/src/r_things.cpp index 0858dce2f..836f58690 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -2612,8 +2612,10 @@ static void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis) void R_DrawParticle_C (vissprite_t *vis) { + DWORD *bg2rgb; int spacing; BYTE *dest; + DWORD fg; BYTE color = vis->Style.BaseColormap->Maps[(vis->Style.ColormapNum << COLORMAPSHIFT) + vis->startfrac]; int yl = vis->y1; int ycount = vis->y2 - yl + 1; @@ -2622,9 +2624,6 @@ void R_DrawParticle_C (vissprite_t *vis) R_DrawMaskedSegsBehindParticle (vis); - DWORD *bg2rgb; - DWORD fg; - // vis->renderflags holds translucency level (0-255) { fixed_t fglevel, bglevel; diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 02ba591b6..6a8dad047 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -44,6 +44,7 @@ #include "r_utility.h" #ifndef NO_SWRENDER #include "r_draw.h" +#include "r_draw_rgba.h" #include "r_main.h" #include "r_things.h" #endif From 312776621e194e36f7ef1b01d36942929ff241bf Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 16 Jun 2016 06:47:30 +0200 Subject: [PATCH 46/94] Added DrawerContext class --- src/CMakeLists.txt | 1 + src/r_bsp.cpp | 4 +- src/r_draw.cpp | 798 ++++++++++++++++++++++++++++++++++++++- src/r_draw.h | 89 ++++- src/r_draw_rgba.cpp | 169 +++++++++ src/r_draw_rgba.h | 7 + src/r_drawer_context.cpp | 464 +++++++++++++++++++++++ src/r_drawer_context.h | 123 ++++++ src/r_drawt.cpp | 22 +- src/r_drawt_rgba.cpp | 2 + src/r_main.cpp | 14 +- src/r_main.h | 16 - src/r_plane.cpp | 454 ++-------------------- src/r_plane.h | 8 - src/r_segs.cpp | 541 +++++--------------------- src/r_swrenderer.cpp | 1 + src/r_things.cpp | 382 +++++-------------- src/r_things.h | 9 +- src/v_draw.cpp | 109 +++--- 19 files changed, 1922 insertions(+), 1291 deletions(-) create mode 100644 src/r_drawer_context.cpp create mode 100644 src/r_drawer_context.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8c0a30ea0..49152b785 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -892,6 +892,7 @@ set( FASTMATH_PCH_SOURCES r_swrenderer.cpp r_3dfloors.cpp r_bsp.cpp + r_drawer_context.cpp r_draw.cpp r_draw_rgba.cpp r_drawt.cpp diff --git a/src/r_bsp.cpp b/src/r_bsp.cpp index 934d2d3e5..2b94b1e95 100644 --- a/src/r_bsp.cpp +++ b/src/r_bsp.cpp @@ -41,7 +41,7 @@ #include "r_local.h" #include "r_main.h" #include "r_plane.h" -#include "r_draw.h" +#include "r_drawer_context.h" #include "r_things.h" #include "r_3dfloors.h" #include "a_sharedglobal.h" @@ -545,7 +545,7 @@ void R_AddLine (seg_t *line) curline = line; // [RH] Color if not texturing line - dc_color = (((int)(line - segs) * 8) + 4) & 255; + DrawerContext::SetFlatColor((((int)(line - segs) * 8) + 4) & 255); pt1 = line->v1->fPos() - ViewPos; pt2 = line->v2->fPos() - ViewPos; diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 4dcdc3e6b..19195e907 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -23,6 +23,8 @@ // //----------------------------------------------------------------------------- +#define DRAWER_INTERNALS + #include #include "templates.h" @@ -40,6 +42,8 @@ #include "r_data/colormaps.h" #include "r_plane.h" #include "r_draw_rgba.h" +#include "r_drawer_context.h" +#include "d_net.h" #include "gi.h" #include "stats.h" @@ -66,6 +70,7 @@ int ylookup[MAXHEIGHT]; BYTE *dc_destorg; } int scaledviewwidth; +DCanvas *dc_canvas; // [RH] Pointers to the different column drawers. // These get changed depending on the current @@ -97,9 +102,9 @@ void (*R_DrawSpanMaskedAddClamp)(void); void (*R_FillSpan)(void); void (*R_FillColumnHoriz)(void); void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip); -void (*R_MapTiltedPlane)(int y, int x1); -void (*R_MapColoredPlane)(int y, int x1); -void (*R_DrawParticle)(vissprite_t *); +void (*R_DrawTiltedSpan)(int y, int x1, int x2); +void (*R_DrawColoredSpan)(int y, int x1, int x2); +void (*R_FillTransColumn)(int x, int y1, int y2, int color, int alpha); fixed_t (*tmvline1_add)(); void (*tmvline4_add)(); fixed_t (*tmvline1_addclamp)(); @@ -134,6 +139,24 @@ void (*rt_tlatesubclamp4cols)(int sx, int yl, int yh); void (*rt_tlaterevsubclamp4cols)(int sx, int yl, int yh); void (*rt_initcols)(BYTE *buffer); void (*rt_span_coverage)(int x, int start, int stop); +void (*colfunc) (void); +void (*basecolfunc) (void); +void (*fuzzcolfunc) (void); +void (*transcolfunc) (void); +void (*spanfunc) (void); +void (*hcolfunc_pre) (void); +void (*hcolfunc_post1) (int hx, int sx, int yl, int yh); +void (*hcolfunc_post2) (int hx, int sx, int yl, int yh); +void (*hcolfunc_post4) (int sx, int yl, int yh); + +extern "C" void R_DrawTiltedPlane_ASM(int y, int x1); +#ifdef X86_ASM +extern "C" void R_SetSpanSource_ASM(const BYTE *flat); +extern "C" void R_SetSpanSize_ASM(int xbits, int ybits); +extern "C" void R_SetSpanColormap_ASM(BYTE *colormap); +extern "C" void R_SetTiltedSpanSource_ASM(const BYTE *flat); +extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource; +#endif // // R_DrawColumn @@ -1040,13 +1063,6 @@ const BYTE* ds_source; // just for profiling int dscount; - -#ifdef X86_ASM -extern "C" void R_SetSpanSource_ASM (const BYTE *flat); -extern "C" void R_SetSpanSize_ASM (int xbits, int ybits); -extern "C" void R_SetSpanColormap_ASM (BYTE *colormap); -extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource; -#endif } //========================================================================== @@ -1076,9 +1092,8 @@ void R_SetSpanSource(const BYTE *pixels) // //========================================================================== -void R_SetSpanColormap(FDynamicColormap *colormap, int shade) +void R_SetSpanColormap() { - R_SetDSColorMapLight(colormap, 0, shade); #ifdef X86_ASM if (!r_swtruecolor && ds_colormap != ds_curcolormap) { @@ -2297,9 +2312,9 @@ void R_InitColumnDrawers () R_FillColumnHoriz = R_FillColumnHoriz_rgba; R_DrawFogBoundary = R_DrawFogBoundary_rgba; - R_MapTiltedPlane = R_MapTiltedPlane_rgba; - R_MapColoredPlane = R_MapColoredPlane_rgba; - R_DrawParticle = R_DrawParticle_rgba; + R_DrawTiltedSpan = R_DrawTiltedSpan_rgba; + R_DrawColoredSpan = R_DrawColoredSpan_rgba; + R_FillTransColumn = R_FillTransColumn_rgba; tmvline1_add = tmvline1_add_rgba; tmvline4_add = tmvline4_add_rgba; @@ -2394,9 +2409,15 @@ void R_InitColumnDrawers () R_FillColumnHoriz = R_FillColumnHorizP_C; R_DrawFogBoundary = R_DrawFogBoundary_C; - R_MapTiltedPlane = R_MapTiltedPlane_C; - R_MapColoredPlane = R_MapColoredPlane_C; - R_DrawParticle = R_DrawParticle_C; + R_DrawColoredSpan = R_DrawColoredSpan_C; + R_FillTransColumn = R_FillTransColumn_C; + +#ifdef X86_ASM + // To do: update R_DrawTiltedPlane_ASM to use x2 rather than spanend[y] + R_DrawTiltedSpan = [](int y, int x1, int x2) { R_DrawTiltedPlane_ASM(y, x1); }; +#else + R_DrawTiltedSpan = R_DrawTiltedSpan_C; +#endif tmvline1_add = tmvline1_add_C; tmvline4_add = tmvline4_add_C; @@ -2829,3 +2850,744 @@ void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade) ds_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); } } + +///////////////////////////////////////////////////////////////////////////// + +FVector3 ds_plane_sz, ds_plane_su, ds_plane_sv; +bool ds_plane_shade; +float ds_planelightfloat; +fixed_t ds_pviewx, ds_pviewy; +int ds_planeshade; +extern "C" BYTE *tiltlighting[MAXWIDTH]; + +extern "C" { void R_CalcTiltedLighting(double lval, double lend, int width); } + +#ifdef _MSC_VER +#pragma warning(disable:4244) // warning C4244: conversion from 'SQWORD' to 'DWORD', possible loss of data +#endif + +//========================================================================== +// +// R_CalcTiltedLighting +// +// Calculates the lighting for one row of a tilted plane. If the definition +// of GETPALOOKUP changes, this needs to change, too. +// +//========================================================================== + +extern "C" { +void R_CalcTiltedLighting (double lval, double lend, int width) +{ + double lstep; + BYTE *lightfiller; + BYTE *basecolormapdata = ds_fcolormap->Maps; + int i = 0; + + if (width == 0 || lval == lend) + { // Constant lighting + lightfiller = basecolormapdata + (GETPALOOKUP(lval, ds_planeshade) << COLORMAPSHIFT); + } + else + { + lstep = (lend - lval) / width; + if (lval >= MAXLIGHTVIS) + { // lval starts "too bright". + lightfiller = basecolormapdata + (GETPALOOKUP(lval, ds_planeshade) << COLORMAPSHIFT); + for (; i <= width && lval >= MAXLIGHTVIS; ++i) + { + tiltlighting[i] = lightfiller; + lval += lstep; + } + } + if (lend >= MAXLIGHTVIS) + { // lend ends "too bright". + lightfiller = basecolormapdata + (GETPALOOKUP(lend, ds_planeshade) << COLORMAPSHIFT); + for (; width > i && lend >= MAXLIGHTVIS; --width) + { + tiltlighting[width] = lightfiller; + lend -= lstep; + } + } + if (width > 0) + { + lval = FIXED2DBL(ds_planeshade) - lval; + lend = FIXED2DBL(ds_planeshade) - lend; + lstep = (lend - lval) / width; + if (lstep < 0) + { // Going from dark to light + if (lval < 1.) + { // All bright + lightfiller = basecolormapdata; + } + else + { + if (lval >= NUMCOLORMAPS) + { // Starts beyond the dark end + BYTE *clight = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT); + while (lval >= NUMCOLORMAPS && i <= width) + { + tiltlighting[i++] = clight; + lval += lstep; + } + if (i > width) + return; + } + while (i <= width && lval >= 0) + { + tiltlighting[i++] = basecolormapdata + (xs_ToInt(lval) << COLORMAPSHIFT); + lval += lstep; + } + lightfiller = basecolormapdata; + } + } + else + { // Going from light to dark + if (lval >= (NUMCOLORMAPS-1)) + { // All dark + lightfiller = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT); + } + else + { + while (lval < 0 && i <= width) + { + tiltlighting[i++] = basecolormapdata; + lval += lstep; + } + if (i > width) + return; + while (i <= width && lval < (NUMCOLORMAPS-1)) + { + tiltlighting[i++] = basecolormapdata + (xs_ToInt(lval) << COLORMAPSHIFT); + lval += lstep; + } + lightfiller = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT); + } + } + } + } + for (; i <= width; i++) + { + tiltlighting[i] = lightfiller; + } +} +} // extern "C" + +void R_DrawTiltedSpan_C (int y, int x1, int x2) +{ + int width = x2 - x1; + double iz, uz, vz; + BYTE *fb; + DWORD u, v; + int i; + + iz = ds_plane_sz[2] + ds_plane_sz[1]*(centery-y) + ds_plane_sz[0]*(x1-centerx); + + // Lighting is simple. It's just linear interpolation from start to end + if (ds_plane_shade) + { + uz = (iz + ds_plane_sz[0]*width) * ds_planelightfloat; + vz = iz * ds_planelightfloat; + R_CalcTiltedLighting (vz, uz, width); + } + + uz = ds_plane_su[2] + ds_plane_su[1]*(centery-y) + ds_plane_su[0]*(x1-centerx); + vz = ds_plane_sv[2] + ds_plane_sv[1]*(centery-y) + ds_plane_sv[0]*(x1-centerx); + + fb = ylookup[y] + x1 + dc_destorg; + + BYTE vshift = 32 - ds_ybits; + BYTE ushift = vshift - ds_xbits; + int umask = ((1 << ds_xbits) - 1) << ds_ybits; + +#if 0 // The "perfect" reference version of this routine. Pretty slow. + // Use it only to see how things are supposed to look. + i = 0; + do + { + double z = 1.f/iz; + + u = SQWORD(uz*z) + ds_pviewx; + v = SQWORD(vz*z) + ds_pviewy; + R_SetDSColorMapLight(tiltlighting[i], 0, 0); + fb[i++] = ds_colormap[ds_source[(v >> vshift) | ((u >> ushift) & umask)]]; + iz += ds_plane_sz[0]; + uz += ds_plane_su[0]; + vz += ds_plane_sv[0]; + } while (--width >= 0); +#else +//#define SPANSIZE 32 +//#define INVSPAN 0.03125f +//#define SPANSIZE 8 +//#define INVSPAN 0.125f +#define SPANSIZE 16 +#define INVSPAN 0.0625f + + double startz = 1.f/iz; + double startu = uz*startz; + double startv = vz*startz; + double izstep, uzstep, vzstep; + + izstep = ds_plane_sz[0] * SPANSIZE; + uzstep = ds_plane_su[0] * SPANSIZE; + vzstep = ds_plane_sv[0] * SPANSIZE; + x1 = 0; + width++; + + while (width >= SPANSIZE) + { + iz += izstep; + uz += uzstep; + vz += vzstep; + + double endz = 1.f/iz; + double endu = uz*endz; + double endv = vz*endz; + DWORD stepu = SQWORD((endu - startu) * INVSPAN); + DWORD stepv = SQWORD((endv - startv) * INVSPAN); + u = SQWORD(startu) + ds_pviewx; + v = SQWORD(startv) + ds_pviewy; + + for (i = SPANSIZE-1; i >= 0; i--) + { + fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); + x1++; + u += stepu; + v += stepv; + } + startu = endu; + startv = endv; + width -= SPANSIZE; + } + if (width > 0) + { + if (width == 1) + { + u = SQWORD(startu); + v = SQWORD(startv); + fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); + } + else + { + double left = width; + iz += ds_plane_sz[0] * left; + uz += ds_plane_su[0] * left; + vz += ds_plane_sv[0] * left; + + double endz = 1.f/iz; + double endu = uz*endz; + double endv = vz*endz; + left = 1.f/left; + DWORD stepu = SQWORD((endu - startu) * left); + DWORD stepv = SQWORD((endv - startv) * left); + u = SQWORD(startu) + ds_pviewx; + v = SQWORD(startv) + ds_pviewy; + + for (; width != 0; width--) + { + fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); + x1++; + u += stepu; + v += stepv; + } + } + } +#endif +} + +void R_DrawColoredSpan_C (int y, int x1, int x2) +{ + memset (ylookup[y] + x1 + dc_destorg, ds_color, x2 - x1 + 1); +} + +///////////////////////////////////////////////////////////////////////////// + +// Draw a column with support for non-power-of-two ranges +uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv_step, uint32_t uv_max, const BYTE *source, DWORD(*draw1column)()) +{ + int pixelsize = r_swtruecolor ? 4 : 1; + if (uv_max == 0) // power of two + { + int count = y2 - y1; + + dc_source = source; + dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_count = count; + dc_iscale = uv_step; + dc_texturefrac = uv_start; + draw1column(); + + uint64_t step64 = uv_step; + uint64_t pos64 = uv_start; + return (uint32_t)(pos64 + step64 * count); + } + else + { + uint32_t uv_pos = uv_start; + + uint32_t left = y2 - y1; + while (left > 0) + { + uint32_t available = uv_max - uv_pos; + uint32_t next_uv_wrap = available / uv_step; + if (available % uv_step != 0) + next_uv_wrap++; + uint32_t count = MIN(left, next_uv_wrap); + + dc_source = source; + dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_count = count; + dc_iscale = uv_step; + dc_texturefrac = uv_pos; + draw1column(); + + left -= count; + uv_pos += uv_step * count; + if (uv_pos >= uv_max) + uv_pos -= uv_max; + } + + return uv_pos; + } +} + +// Draw four columns with support for non-power-of-two ranges +void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_step, uint32_t uv_max, const BYTE **source, void(*draw4columns)()) +{ + int pixelsize = r_swtruecolor ? 4 : 1; + if (uv_max == 0) // power of two, no wrap handling needed + { + int count = y2 - y1; + for (int i = 0; i < 4; i++) + { + bufplce[i] = source[i]; + vplce[i] = uv_pos[i]; + vince[i] = uv_step[i]; + + uint64_t step64 = uv_step[i]; + uint64_t pos64 = uv_pos[i]; + uv_pos[i] = (uint32_t)(pos64 + step64 * count); + } + dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_count = count; + draw4columns(); + } + else + { + dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + for (int i = 0; i < 4; i++) + bufplce[i] = source[i]; + + uint32_t left = y2 - y1; + while (left > 0) + { + // Find which column wraps first + uint32_t count = left; + for (int i = 0; i < 4; i++) + { + uint32_t available = uv_max - uv_pos[i]; + uint32_t next_uv_wrap = available / uv_step[i]; + if (available % uv_step[i] != 0) + next_uv_wrap++; + count = MIN(next_uv_wrap, count); + } + + // Draw until that column wraps + for (int i = 0; i < 4; i++) + { + vplce[i] = uv_pos[i]; + vince[i] = uv_step[i]; + } + dc_count = count; + draw4columns(); + + // Wrap the uv position + for (int i = 0; i < 4; i++) + { + uv_pos[i] += uv_step[i] * count; + if (uv_pos[i] >= uv_max) + uv_pos[i] -= uv_max; + } + + left -= count; + } + } +} + +// Calculates a wrapped uv start position value for a column +void calc_uv_start_and_step(int y1, float swal, double yrepeat, uint32_t uv_height, int fracbits, uint32_t &uv_start_out, uint32_t &uv_step_out) +{ + double uv_stepd = swal * yrepeat; + + // Find start uv in [0-uv_height[ range. + // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. + double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / uv_height; + v = v - floor(v); + v *= uv_height; + v *= (1 << fracbits); + + uv_start_out = (uint32_t)v; + uv_step_out = xs_ToFixed(fracbits, uv_stepd); +} + +typedef DWORD(*Draw1ColumnFuncPtr)(); +typedef void(*Draw4ColumnsFuncPtr)(); + +void wallscan_any( + int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, + FTexture *rw_pic, fixed_t rw_offset, const BYTE *(*getcol)(FTexture *tex, int x), + void(setupwallscan(int bits, Draw1ColumnFuncPtr &draw1, Draw4ColumnsFuncPtr &draw2))) +{ + if (rw_pic->UseType == FTexture::TEX_Null) + return; + + uint32_t uv_height = rw_pic->GetHeight(); + uint32_t fracbits = 32 - rw_pic->HeightBits; + uint32_t uv_max = uv_height << fracbits; + + DWORD(*draw1column)(); + void(*draw4columns)(); + setupwallscan(fracbits, draw1column, draw4columns); + + fixed_t xoffset = rw_offset; + + bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); + if (fixed) + { + palookupoffse[0] = dc_colormap; + palookupoffse[1] = dc_colormap; + palookupoffse[2] = dc_colormap; + palookupoffse[3] = dc_colormap; + palookuplight[0] = 0; + palookuplight[1] = 0; + palookuplight[2] = 0; + palookuplight[3] = 0; + } + + if (fixedcolormap) + R_SetColorMapLight(fixedcolormap, 0, 0); + else + R_SetColorMapLight(basecolormap, 0, 0); + + float light = rw_light; + + // Calculate where 4 column alignment begins and ends: + int aligned_x1 = clamp((x1 + 3) / 4 * 4, x1, x2); + int aligned_x2 = clamp(x2 / 4 * 4, x1, x2); + + // First unaligned columns: + for (int x = x1; x < aligned_x1; x++, light += rw_lightstep) + { + int y1 = uwal[x]; + int y2 = dwal[x]; + if (y2 <= y1) + continue; + + if (!fixed) + R_SetColorMapLight(basecolormap, light, wallshade); + + const BYTE *source = getcol(rw_pic, (lwal[x] + xoffset) >> FRACBITS); + + uint32_t uv_start, uv_step; + calc_uv_start_and_step(y1, swal[x], yrepeat, uv_height, fracbits, uv_start, uv_step); + + wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, source, draw1column); + } + + // The aligned columns + for (int x = aligned_x1; x < aligned_x2; x += 4) + { + // Find y1, y2, light and uv values for four columns: + int y1[4] = { uwal[x], uwal[x + 1], uwal[x + 2], uwal[x + 3] }; + int y2[4] = { dwal[x], dwal[x + 1], dwal[x + 2], dwal[x + 3] }; + + const BYTE *source[4]; + for (int i = 0; i < 4; i++) + source[i] = getcol(rw_pic, (lwal[x + i] + xoffset) >> FRACBITS); + + float lights[4]; + for (int i = 0; i < 4; i++) + { + lights[i] = light; + light += rw_lightstep; + } + + uint32_t uv_pos[4], uv_step[4]; + for (int i = 0; i < 4; i++) + calc_uv_start_and_step(y1[i], swal[x + i], yrepeat, uv_height, fracbits, uv_pos[i], uv_step[i]); + + // Figure out where we vertically can start and stop drawing 4 columns in one go + int middle_y1 = y1[0]; + int middle_y2 = y2[0]; + for (int i = 1; i < 4; i++) + { + middle_y1 = MAX(y1[i], middle_y1); + middle_y2 = MIN(y2[i], middle_y2); + } + + // If we got an empty column in our set we cannot draw 4 columns in one go: + bool empty_column_in_set = false; + for (int i = 0; i < 4; i++) + { + if (y2[i] <= y1[i]) + empty_column_in_set = true; + } + + if (empty_column_in_set || middle_y2 <= middle_y1) + { + for (int i = 0; i < 4; i++) + { + if (y2[i] <= y1[i]) + continue; + + if (!fixed) + R_SetColorMapLight(basecolormap, lights[i], wallshade); + wallscan_drawcol1(x + i, y1[i], y2[i], uv_pos[i], uv_step[i], uv_max, source[i], draw1column); + } + continue; + } + + // Draw the first rows where not all 4 columns are active + for (int i = 0; i < 4; i++) + { + if (!fixed) + R_SetColorMapLight(basecolormap, lights[i], wallshade); + + if (y1[i] < middle_y1) + uv_pos[i] = wallscan_drawcol1(x + i, y1[i], middle_y1, uv_pos[i], uv_step[i], uv_max, source[i], draw1column); + } + + // Draw the area where all 4 columns are active + if (!fixed) + { + for (int i = 0; i < 4; i++) + { + if (r_swtruecolor) + { + palookupoffse[i] = basecolormap->Maps; + palookuplight[i] = LIGHTSCALE(lights[i], wallshade); + } + else + { + palookupoffse[i] = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); + palookuplight[i] = 0; + } + } + } + wallscan_drawcol4(x, middle_y1, middle_y2, uv_pos, uv_step, uv_max, source, draw4columns); + + // Draw the last rows where not all 4 columns are active + for (int i = 0; i < 4; i++) + { + if (!fixed) + R_SetColorMapLight(basecolormap, lights[i], wallshade); + + if (middle_y2 < y2[i]) + uv_pos[i] = wallscan_drawcol1(x + i, middle_y2, y2[i], uv_pos[i], uv_step[i], uv_max, source[i], draw1column); + } + } + + // The last unaligned columns: + for (int x = aligned_x2; x < x2; x++, light += rw_lightstep) + { + int y1 = uwal[x]; + int y2 = dwal[x]; + if (y2 <= y1) + continue; + + if (!fixed) + R_SetColorMapLight(basecolormap, light, wallshade); + + const BYTE *source = getcol(rw_pic, (lwal[x] + xoffset) >> FRACBITS); + + uint32_t uv_start, uv_step; + calc_uv_start_and_step(y1, swal[x], yrepeat, uv_height, fracbits, uv_start, uv_step); + + wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, source, draw1column); + } + + NetUpdate(); +} + +void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *rw_pic, fixed_t rw_offset, const BYTE *(*getcol)(FTexture *tex, int x)) +{ + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, rw_pic, rw_offset, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + { + setupvline(bits); + line1 = dovline1; + line4 = dovline4; + }); +} + +void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *rw_pic, fixed_t rw_offset, const BYTE *(*getcol)(FTexture *tex, int x)) +{ + if (!rw_pic->bMasked) // Textures that aren't masked can use the faster wallscan. + { + wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, rw_pic, rw_offset, getcol); + } + else + { + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, rw_pic, rw_offset, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + { + setupmvline(bits); + line1 = domvline1; + line4 = domvline4; + }); + } +} + +void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *rw_pic, fixed_t rw_offset, const BYTE *(*getcol)(FTexture *tex, int x)) +{ + static fixed_t(*tmvline1)(); + static void(*tmvline4)(); + if (!R_GetTransMaskDrawers(&tmvline1, &tmvline4)) + { + // The current translucency is unsupported, so draw with regular maskwallscan instead. + maskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, rw_pic, rw_offset, getcol); + } + else + { + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, rw_pic, rw_offset, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + { + setuptmvline(bits); + line1 = reinterpret_cast(tmvline1); + line4 = tmvline4; + }); + } +} + +///////////////////////////////////////////////////////////////////////////// + +void R_FillTransColumn_C(int x, int y1, int y2, int color, int alpha) +{ + fixed_t fglevel, bglevel; + DWORD *fg2rgb; + DWORD *bg2rgb; + int spacing; + BYTE *dest; + DWORD fg; + + fglevel = ((alpha + 1) << 8) & ~0x3ff; + bglevel = FRACUNIT - fglevel; + fg2rgb = Col2RGB8[fglevel >> 10]; + bg2rgb = Col2RGB8[bglevel >> 10]; + fg = fg2rgb[color]; + + spacing = dc_pitch; + + int ycount = y2 - y1 + 1; + dest = ylookup[y1] + x + dc_destorg; + for (int y = 0; y < ycount; y++) + { + DWORD bg = bg2rgb[*dest]; + bg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[bg & (bg >> 15)]; + dest += spacing; + } +} + +///////////////////////////////////////////////////////////////////////////// + +// +// R_DrawMaskedColumn +// Used for sprites and masked mid textures. +// Masked means: partly transparent, i.e. stored +// in posts/runs of opaque pixels. +// +short* dc_mfloorclip; +short* dc_mceilingclip; + +double dc_spryscale; +double dc_sprtopscreen; + +bool dc_sprflipvert; + +void R_DrawMaskedColumn(int x, const BYTE *column, const FTexture::Span *span) +{ + int pixelsize = r_swtruecolor ? 4 : 1; + const fixed_t centeryfrac = FLOAT2FIXED(CenterY); + const fixed_t texturemid = FLOAT2FIXED(dc_texturemid); + while (span->Length != 0) + { + const int length = span->Length; + const int top = span->TopOffset; + + fixed_t texturefrac = dc_texturefrac; + fixed_t iscale = dc_iscale; + + // calculate unclipped screen coordinates for post + int yl = xs_RoundToInt(dc_sprtopscreen + dc_spryscale * top); + int yh = xs_RoundToInt(dc_sprtopscreen + dc_spryscale * (top + length)) - 1; + + if (dc_sprflipvert) + { + swapvalues(yl, yh); + } + + if (yh >= dc_mfloorclip[x]) + { + yh = dc_mfloorclip[x] - 1; + } + if (yl < dc_mceilingclip[x]) + { + yl = dc_mceilingclip[x]; + } + + if (yl <= yh) + { + if (dc_sprflipvert) + { + texturefrac = (yl*iscale) - (top << FRACBITS) + - FixedMul(centeryfrac, iscale) - texturemid; + const fixed_t maxfrac = length << FRACBITS; + while (texturefrac >= maxfrac) + { + if (++yl > yh) + goto nextpost; + texturefrac += iscale; + } + fixed_t endfrac = texturefrac + (yh - yl)*iscale; + while (endfrac < 0) + { + if (--yh < yl) + goto nextpost; + endfrac -= iscale; + } + } + else + { + texturefrac = texturemid - (top << FRACBITS) + + (yl*iscale) - FixedMul(centeryfrac - FRACUNIT, iscale); + while (texturefrac < 0) + { + if (++yl > yh) + goto nextpost; + texturefrac += iscale; + } + fixed_t endfrac = texturefrac + (yh - yl)*iscale; + const fixed_t maxfrac = length << FRACBITS; + if (yh < dc_mfloorclip[x] - 1 && endfrac < maxfrac - iscale) + { + yh++; + } + else while (endfrac >= maxfrac) + { + if (--yh < yl) + goto nextpost; + endfrac -= iscale; + } + } + + dc_yl = yl; + dc_yh = yh; + dc_x = x; + dc_texturefrac = texturefrac; + dc_iscale = iscale; + dc_source = column + top; + dc_count = yh - yl + 1; + dc_dest = (ylookup[yl] + x) * pixelsize + dc_destorg; + colfunc(); + } + nextpost: + span++; + } +} diff --git a/src/r_draw.h b/src/r_draw.h index a31183405..c22c958d0 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -25,6 +25,11 @@ #include "r_defs.h" +// Prevents files outside the DrawerContext class getting good ideas about +// accessing the private globals. Any drawer actions should be facilitated +// via the DrawerContext class! +#ifdef DRAWER_INTERNALS + // Spectre/Invisibility. #define FUZZTABLE 50 extern "C" int fuzzoffset[FUZZTABLE + 1]; // [RH] +1 for the assembly routine @@ -71,6 +76,7 @@ extern "C" fixed_t dc_destalpha; // first pixel in a column extern "C" const BYTE* dc_source; +extern DCanvas *dc_canvas; extern "C" BYTE *dc_dest, *dc_destorg; extern "C" int dc_count; @@ -86,6 +92,23 @@ extern "C" unsigned int dc_tspans[4][MAXHEIGHT]; extern "C" unsigned int *dc_ctspan[4]; extern "C" unsigned int horizspans[4]; +// +// Function pointers to switch refresh/drawing functions. +// Used to select shadow mode etc. +// +extern void (*colfunc) (void); +extern void (*basecolfunc) (void); +extern void (*fuzzcolfunc) (void); +extern void (*transcolfunc) (void); +// No shadow effects on floors. +extern void (*spanfunc) (void); + +// [RH] Function pointers for the horizontal column drawers. +extern void (*hcolfunc_pre) (void); +extern void (*hcolfunc_post1) (int hx, int sx, int yl, int yh); +extern void (*hcolfunc_post2) (int hx, int sx, int yl, int yh); +extern void (*hcolfunc_post4) (int sx, int yl, int yh); + // [RH] Pointers to the different column and span drawers... // The span blitting interface. @@ -116,7 +139,7 @@ extern void (*R_DrawTranslatedColumn)(void); // Span drawing for rows, floor/ceiling. No Spectre effect needed. extern void (*R_DrawSpan)(void); void R_SetupSpanBits(FTexture *tex); -void R_SetSpanColormap(FDynamicColormap *colormap, int shade); +void R_SetSpanColormap(); void R_SetSpanSource(const BYTE *pixels); // Span drawing for masked textures. @@ -281,6 +304,15 @@ void R_FillColumnP_C (void); void R_FillColumnHorizP_C (void); void R_FillSpan_C (void); +// vars for R_DrawMaskedColumn +extern short* dc_mfloorclip; +extern short* dc_mceilingclip; +extern double dc_spryscale; +extern double dc_sprtopscreen; +extern bool dc_sprflipvert; + +void R_DrawMaskedColumn(int x, const BYTE *column, const FTexture::Span *spans); + #ifdef X86_ASM #define R_SetupDrawSlab R_SetupDrawSlabA #define R_DrawSlab R_DrawSlabA @@ -325,12 +357,7 @@ void R_InitShadeMaps(); void R_InitFuzzTable (int fuzzoff); // [RH] Consolidate column drawer selection -enum ESPSResult -{ - DontDraw, // not useful to draw this - DoDraw0, // draw this as if r_columnmethod is 0 - DoDraw1, // draw this as if r_columnmethod is 1 -}; +enum ESPSResult; ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, DWORD color); inline ESPSResult R_SetPatchStyle(FRenderStyle style, float alpha, int translation, DWORD color) { @@ -353,18 +380,6 @@ extern void(*tmvline4_revsubclamp)(); // transmaskwallscan calls this to find out what column drawers to use bool R_GetTransMaskDrawers (fixed_t (**tmvline1)(), void (**tmvline4)()); -// Retrieve column data for wallscan. Should probably be removed -// to just use the texture's GetColumn() method. It just exists -// for double-layer skies. -const BYTE *R_GetColumn (FTexture *tex, int col); -void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); - -// maskwallscan is exactly like wallscan but does not draw anything where the texture is color 0. -void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); - -// transmaskwallscan is like maskwallscan, but it can also blend to the background -void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); - // Sets dc_colormap and dc_light to their appropriate values depending on the output format (pal vs true color) void R_SetColorMapLight(FColormap *base_colormap, float light, int shade); @@ -373,4 +388,40 @@ void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade); void R_SetTranslationMap(lighttable_t *translation); +// Retrieve column data for wallscan. Should probably be removed +// to just use the texture's GetColumn() method. It just exists +// for double-layer skies. +const BYTE *R_GetColumn (FTexture *tex, int col); +void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *rw_pic, fixed_t rw_offset, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); + +// maskwallscan is exactly like wallscan but does not draw anything where the texture is color 0. +void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *rw_pic, fixed_t rw_offset, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); + +// transmaskwallscan is like maskwallscan, but it can also blend to the background +void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *rw_pic, fixed_t rw_offset, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); + +extern void(*R_DrawColoredSpan)(int y, int x1, int x2); +extern void(*R_DrawTiltedSpan)(int y, int x1, int x2); + +void R_DrawTiltedSpan_C(int y, int x1, int x2); +void R_DrawTiltedSpan_rgba(int y, int x1, int x2); +void R_DrawColoredSpan_C(int y, int x1, int x2); +void R_DrawColoredSpan_rgba(int y, int x1, int x2); + +extern FVector3 ds_plane_sz, ds_plane_su, ds_plane_sv; +extern bool ds_plane_shade; +extern float ds_planelightfloat; +extern fixed_t ds_pviewx, ds_pviewy; +extern int ds_planeshade; + +extern "C" BYTE *tiltlighting[MAXWIDTH]; +extern "C" { void R_CalcTiltedLighting(double lval, double lend, int width); } + +struct vissprite_t; +extern void(*R_FillTransColumn)(int x, int y1, int y2, int color, int alpha); +void R_FillTransColumn_C(int x, int y1, int y2, int color, int alpha); +void R_FillTransColumn_rgba(int x, int y1, int y2, int color, int alpha); + +#endif + #endif diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 2062609b4..420b63dff 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -21,6 +21,8 @@ // //----------------------------------------------------------------------------- +#define DRAWER_INTERNALS + #include #include "templates.h" @@ -3492,6 +3494,158 @@ public: } }; +class DrawTiltedSpanRGBACommand : public DrawerCommand +{ + int _y; + int _x1; + int _x2; + BYTE *dc_destorg; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + const BYTE *ds_source; + +public: + DrawTiltedSpanRGBACommand(int y, int x1, int x2) + { + _y = y; + _x1 = x1; + _x2 = x2; + + dc_destorg = ::dc_destorg; + ds_source = ::ds_source; + } + + void Execute(DrawerThread *thread) override + { + if (thread->line_skipped_by_thread(_y)) + return; + + int y = _y; + int x1 = _x1; + int x2 = _x2; + + // Slopes are broken currently in master. + // Until R_DrawTiltedPlane is fixed we are just going to fill with a solid color. + + uint32_t *source = (uint32_t*)ds_source; + uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; + + int count = x2 - x1 + 1; + while (count > 0) + { + *(dest++) = source[0]; + count--; + } + } +}; + +class DrawColoredSpanRGBACommand : public DrawerCommand +{ + int _y; + int _x1; + int _x2; + BYTE *dc_destorg; + fixed_t ds_light; + int ds_color; + +public: + DrawColoredSpanRGBACommand(int y, int x1, int x2) + { + _y = y; + _x1 = x1; + _x2 = x2; + + dc_destorg = ::dc_destorg; + ds_light = ::ds_light; + ds_color = ::ds_color; + } + + void Execute(DrawerThread *thread) override + { + if (thread->line_skipped_by_thread(_y)) + return; + + int y = _y; + int x1 = _x1; + int x2 = _x2; + + uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; + int count = (x2 - x1 + 1); + uint32_t light = calc_light_multiplier(ds_light); + uint32_t color = shade_pal_index_simple(ds_color, light); + for (int i = 0; i < count; i++) + dest[i] = color; + } +}; + +class FillTransColumnRGBACommand : public DrawerCommand +{ + int _x; + int _y1; + int _y2; + int _color; + int _a; + BYTE *dc_destorg; + int dc_pitch; + fixed_t ds_light; + int ds_color; + +public: + FillTransColumnRGBACommand(int x, int y1, int y2, int color, int a) + { + _x = x; + _y1 = y1; + _y2 = y2; + _color = color; + _a = a; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + } + + void Execute(DrawerThread *thread) override + { + int x = _x; + int y1 = _y1; + int y2 = _y2; + int color = _color; + int a = _a; + + int ycount = thread->count_for_thread(y1, y2 - y1 + 1); + if (ycount <= 0) + return; + + uint32_t fg = GPalette.BaseColors[color].d; + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t alpha = a + 1; + uint32_t inv_alpha = 256 - alpha; + + fg_red *= alpha; + fg_green *= alpha; + fg_blue *= alpha; + + int spacing = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(y1, dc_pitch, ylookup[y1] + x + (uint32_t*)dc_destorg); + + for (int y = 0; y < ycount; y++) + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red * inv_alpha) / 256; + uint32_t green = (fg_green + bg_green * inv_alpha) / 256; + uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += spacing; + } + } +}; + ApplySpecialColormapRGBACommand::ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen) { buffer = screen->GetBuffer(); @@ -3968,3 +4122,18 @@ void R_DrawFogBoundary_rgba(int x1, int x2, short *uclip, short *dclip) R_DrawFogBoundarySection_rgba(t2, b2, x1); } } + +void R_DrawTiltedSpan_rgba(int y, int x1, int x2) +{ + DrawerCommandQueue::QueueCommand(y, x1, x2); +} + +void R_DrawColoredSpan_rgba(int y, int x1, int x2) +{ + DrawerCommandQueue::QueueCommand(y, x1, x2); +} + +void R_FillTransColumn_rgba(int x, int y1, int y2, int color, int a) +{ + DrawerCommandQueue::QueueCommand(x, y1, y2, color, a); +} diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 47ea75260..a91b54d74 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -23,6 +23,11 @@ #ifndef __R_DRAW_RGBA__ #define __R_DRAW_RGBA__ +// Prevents files outside the DrawerContext class getting good ideas about +// accessing the private globals. Any drawer actions should be facilitated +// via the DrawerContext class! +#ifdef DRAWER_INTERNALS + #include "r_draw.h" #include "v_palette.h" #include @@ -487,3 +492,5 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) } #endif + +#endif diff --git a/src/r_drawer_context.cpp b/src/r_drawer_context.cpp new file mode 100644 index 000000000..3533a3e4f --- /dev/null +++ b/src/r_drawer_context.cpp @@ -0,0 +1,464 @@ +// Emacs style mode select -*- C++ -*- +//----------------------------------------------------------------------------- +// +// $Id:$ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// +// $Log:$ +// +// DESCRIPTION: +// The actual span/column drawing functions. +// Here find the main potential for optimization, +// e.g. inline assembly, different algorithms. +// +//----------------------------------------------------------------------------- + +#define DRAWER_INTERNALS + +#include + +#include "templates.h" +#include "doomdef.h" +#include "i_system.h" +#include "w_wad.h" +#include "r_local.h" +#include "v_video.h" +#include "doomstat.h" +#include "st_stuff.h" +#include "g_game.h" +#include "g_level.h" +#include "r_data/r_translate.h" +#include "v_palette.h" +#include "r_data/colormaps.h" +#include "r_plane.h" +#include "r_draw_rgba.h" +#include "d_net.h" +#include "r_drawer_context.h" + +#include "gi.h" +#include "stats.h" +#include "x86.h" + +#ifdef X86_ASM +extern "C" void R_SetSpanSource_ASM (const BYTE *flat); +extern "C" void R_SetSpanSize_ASM (int xbits, int ybits); +extern "C" void R_SetSpanColormap_ASM (BYTE *colormap); +extern "C" void R_SetTiltedSpanSource_ASM(const BYTE *flat); +extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource; +#endif + +DCanvas *DrawerContext::Canvas() +{ + return dc_canvas; +} + +uint8_t DrawerContext::FlatColor() +{ + return dc_color; +} + +FColormap *DrawerContext::LightColormap() +{ + return dc_fcolormap; +} + +fixed_t DrawerContext::TextureFrac() +{ + return dc_texturefrac; +} + +fixed_t DrawerContext::TextureStep() +{ + return dc_iscale; +} + +double DrawerContext::TextureMid() +{ + return dc_texturemid; +} + +int DrawerContext::SpanXBits() +{ + return ds_xbits; +} + +int DrawerContext::SpanYBits() +{ + return ds_ybits; +} + +lighttable_t *DrawerContext::SpanLitColormap() +{ + return ds_colormap; +} + +bool DrawerContext::IsFuzzColumn() +{ + return colfunc == fuzzcolfunc; +} + +bool DrawerContext::IsFillColumn() +{ + return colfunc == R_FillColumn; +} + +bool DrawerContext::IsBaseColumn() +{ + return colfunc == basecolfunc; +} + +void DrawerContext::SetDest(int x, int y) +{ + int pixelsize = r_swtruecolor ? 4 : 1; + dc_dest = dc_destorg + (ylookup[y] + x) * pixelsize; +} + +void DrawerContext::SetFlatColor(uint8_t color_index) +{ + dc_color = color_index; +} + +void DrawerContext::SetLight(FColormap *base_colormap, float light, int shade) +{ + R_SetColorMapLight(base_colormap, light, shade); +} + +void DrawerContext::SetX(int x) +{ + dc_x = x; +} + +void DrawerContext::SetY1(int y) +{ + dc_yl = y; +} + +void DrawerContext::SetY2(int y) +{ + dc_yh = y; +} + +void DrawerContext::SetSource(const BYTE *source) +{ + dc_source = source; +} + +void DrawerContext::SetTextureFrac(fixed_t pos) +{ + dc_texturefrac = pos; +} + +void DrawerContext::SetTextureStep(fixed_t step) +{ + dc_iscale = step; +} + +void DrawerContext::SetTextureMid(double value) +{ + dc_texturemid = value; +} + +void DrawerContext::SetDrawCount(int count) +{ + dc_count = count; +} + +void DrawerContext::SetSpanY(int y) +{ + ds_y = y; +} + +void DrawerContext::SetSpanX1(int x) +{ + ds_x1 = x; +} + +void DrawerContext::SetSpanX2(int x) +{ + ds_x2 = x; +} + +void DrawerContext::SetSpanXStep(dsfixed_t step) +{ + ds_xstep = step; +} + +void DrawerContext::SetSpanYStep(dsfixed_t step) +{ + ds_ystep = step; +} + +void DrawerContext::SetSpanXBits(int bits) +{ + ds_xbits = bits; +} + +void DrawerContext::SetSpanYBits(int bits) +{ + ds_ybits = bits; +} + +void DrawerContext::SetSpanXFrac(dsfixed_t frac) +{ + ds_xfrac = frac; +} + +void DrawerContext::SetSpanYFrac(dsfixed_t frac) +{ + ds_yfrac = frac; +} + +void DrawerContext::SetSpanLight(FColormap *base_colormap, float light, int shade) +{ + R_SetDSColorMapLight(base_colormap ? base_colormap : &identitycolormap, light, shade); + R_SetSpanColormap(); +} + +ESPSResult DrawerContext::SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, DWORD color) +{ + return R_SetPatchStyle(style, alpha, translation, color); +} + +ESPSResult DrawerContext::SetPatchStyle(FRenderStyle style, float alpha, int translation, DWORD color) +{ + return R_SetPatchStyle(style, FLOAT2FIXED(alpha), translation, color); +} + +void DrawerContext::FinishSetPatchStyle() +{ + R_FinishSetPatchStyle(); +} + +void DrawerContext::SetCanvas(DCanvas *canvas) +{ + dc_canvas = canvas; + dc_destorg = canvas->GetBuffer(); + + if (r_swtruecolor != canvas->IsBgra()) + { + r_swtruecolor = canvas->IsBgra(); + R_InitColumnDrawers(); + } +} + +void DrawerContext::SetTranslationMap(lighttable_t *translation) +{ + R_SetTranslationMap(translation ? translation : identitymap); +} + +void DrawerContext::SetSpanSource(FTexture *tex) +{ + R_SetupSpanBits(tex); + if (r_swtruecolor) + ds_source = (const BYTE*)tex->GetPixelsBgra(); + else + ds_source = tex->GetPixels(); + +#ifdef X86_ASM + if (!r_swtruecolor && ds_source != ds_cursource) + { + R_SetSpanSource_ASM (ds_source); + } + if (!r_swtruecolor) + { + if (ds_source != ds_curtiltedsource) + R_SetTiltedSpanSource_ASM(ds_source); + } +#endif +} + +void DrawerContext::SetTiltedSpanState(FVector3 plane_sz, FVector3 plane_su, FVector3 plane_sv, bool plane_shade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) +{ + ds_plane_sz = plane_sz; + ds_plane_su = plane_su; + ds_plane_sv = plane_sv; + ds_plane_shade = plane_shade; + ds_planelightfloat = planelightfloat; + ds_pviewx = pviewx; + ds_pviewy = pviewy; + + if (!plane_shade) + { + for (int i = 0; i < viewwidth; ++i) + { + tiltlighting[i] = DrawerContext::SpanLitColormap(); + } + } +} + +void DrawerContext::SetSlabLight(const BYTE *colormap) +{ + R_SetupDrawSlab(colormap); +} + +void DrawerContext::DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *source, int dest_x, int dest_y) +{ + int pixelsize = r_swtruecolor ? 4 : 1; + R_DrawSlab(dx, v, dy, vi, source, (ylookup[dest_y] + dest_x) * pixelsize + dc_destorg); +} + +void DrawerContext::SetSpanStyle(fixed_t alpha, bool additive, bool masked) +{ + if (spanfunc != R_FillSpan) + { + if (masked) + { + if (alpha < OPAQUE || additive) + { + if (!additive) + { + spanfunc = R_DrawSpanMaskedTranslucent; + dc_srcblend = Col2RGB8[alpha >> 10]; + dc_destblend = Col2RGB8[(OPAQUE - alpha) >> 10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE - alpha; + } + else + { + spanfunc = R_DrawSpanMaskedAddClamp; + dc_srcblend = Col2RGB8_LessPrecision[alpha >> 10]; + dc_destblend = Col2RGB8_LessPrecision[FRACUNIT >> 10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE - alpha; + } + } + else + { + spanfunc = R_DrawSpanMasked; + } + } + else + { + if (alpha < OPAQUE || additive) + { + if (!additive) + { + spanfunc = R_DrawSpanTranslucent; + dc_srcblend = Col2RGB8[alpha >> 10]; + dc_destblend = Col2RGB8[(OPAQUE - alpha) >> 10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE - alpha; + } + else + { + spanfunc = R_DrawSpanAddClamp; + dc_srcblend = Col2RGB8_LessPrecision[alpha >> 10]; + dc_destblend = Col2RGB8_LessPrecision[FRACUNIT >> 10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE - alpha; + } + } + else + { + spanfunc = R_DrawSpan; + } + } + } +} + +void DrawerContext::RtInitCols(BYTE *buffer) +{ + rt_initcols(buffer); +} + +void DrawerContext::RtSpanCoverage(int x, int start, int stop) +{ + rt_span_coverage(x, start, stop); +} + +void DrawerContext::SetMaskedColumnState(short *mfloorclip, short *mceilingclip, double spryscale, double sprtopscreen, bool sprflipvert) +{ + dc_mfloorclip = mfloorclip; + dc_mceilingclip = mceilingclip; + dc_spryscale = spryscale; + dc_sprtopscreen = sprtopscreen; + dc_sprflipvert = sprflipvert; +} + +void DrawerContext::DrawMaskedColumn(int x, const BYTE *column, const FTexture::Span *spans) +{ + R_DrawMaskedColumn(x, column, spans); +} + +void DrawerContext::DrawMaskedColumnHoriz(int x, const BYTE *column, const FTexture::Span *spans) +{ + dc_x = x; + R_DrawMaskedColumnHoriz(column, spans); +} + +void DrawerContext::DrawFogBoundary(int x1, int x2, short *uclip, short *dclip) +{ + R_DrawFogBoundary(x1, x2, uclip, dclip); +} + +void DrawerContext::DrawRt4cols(int sx) +{ + rt_draw4cols(sx); +} + +void DrawerContext::DrawColumn() +{ + colfunc(); +} + +void DrawerContext::DrawSpan() +{ + spanfunc(); +} + +void DrawerContext::DrawHColumnPre() +{ + hcolfunc_pre(); +} + +void DrawerContext::DrawSimplePolySpan() +{ + R_DrawSpan(); +} + +void DrawerContext::SetBaseStyle() +{ + colfunc = basecolfunc; + hcolfunc_post1 = rt_map1col; + hcolfunc_post4 = rt_map4cols; +} + +void DrawerContext::DrawWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *rw_pic, fixed_t rw_offset, const BYTE *(*getcol)(FTexture *tex, int col)) +{ + wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, rw_pic, rw_offset, getcol ? getcol : R_GetColumn); +} + +void DrawerContext::DrawMaskedWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *rw_pic, fixed_t rw_offset, const BYTE *(*getcol)(FTexture *tex, int col)) +{ + maskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, rw_pic, rw_offset, getcol ? getcol : R_GetColumn); +} + +void DrawerContext::DrawTransMaskedWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *rw_pic, fixed_t rw_offset, const BYTE *(*getcol)(FTexture *tex, int col)) +{ + transmaskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, rw_pic, rw_offset, getcol ? getcol : R_GetColumn); +} + +void DrawerContext::DrawColoredSpan(int y, int x1, int x2) +{ + R_DrawColoredSpan(y, x1, x2); +} + +void DrawerContext::DrawTiltedSpan(int y, int x1, int x2) +{ + R_DrawTiltedSpan(y, x1, x2); +} + +void DrawerContext::FillTransColumn(int x, int y1, int y2, int color, int alpha) +{ + R_FillTransColumn(x, y1, y2, color, alpha); +} diff --git a/src/r_drawer_context.h b/src/r_drawer_context.h new file mode 100644 index 000000000..64e0bf6a0 --- /dev/null +++ b/src/r_drawer_context.h @@ -0,0 +1,123 @@ +// Emacs style mode select -*- C++ -*- +//----------------------------------------------------------------------------- +// +// $Id:$ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// +// DESCRIPTION: +// System specific interface stuff. +// +//----------------------------------------------------------------------------- + + +#ifndef __R_DRAWER_CONTEXT__ +#define __R_DRAWER_CONTEXT__ + +#include "r_defs.h" + +// [RH] Consolidate column drawer selection +enum ESPSResult +{ + DontDraw, // not useful to draw this + DoDraw0, // draw this as if r_columnmethod is 0 + DoDraw1, // draw this as if r_columnmethod is 1 +}; + +struct TiltedPlaneData; + +// Immediate graphics context for column/span based software rendering. +class DrawerContext +{ +public: + static DCanvas *Canvas(); // dc_destorg + + static uint8_t FlatColor(); // dc_color + static FColormap *LightColormap(); // dc_fcolormap + static fixed_t TextureFrac(); // dc_texturefrac + static fixed_t TextureStep(); // dc_iscale + static double TextureMid(); // dc_texturemid + + static int SpanXBits(); // ds_xbits + static int SpanYBits(); // ds_ybits + static lighttable_t *SpanLitColormap(); // ds_colormap + + static bool IsFuzzColumn(); // colfunc == fuzzcolfunc + static bool IsFillColumn(); // colfunc == R_FillColumn + static bool IsBaseColumn(); // colfunc == basecolfunc + + static void SetCanvas(DCanvas *canvas); // dc_destorg + + static void SetFlatColor(uint8_t color_index); // dc_color + static void SetLight(FColormap *base_colormap, float light, int shade); + static void SetTranslationMap(lighttable_t *translation); + static void SetX(int x); // dc_x + static void SetY1(int y); // dc_yl + static void SetY2(int y); // dc_yh + static void SetSource(const BYTE *source); // dc_source + static void SetTextureFrac(fixed_t pos); // dc_texturefrac + static void SetTextureStep(fixed_t step); // dc_iscale + static void SetTextureMid(double value); // dc_texturemid + static void SetDest(int x, int y); // dc_dest + static void SetDrawCount(int count); // dc_count + + static void SetSpanY(int y); // ds_y + static void SetSpanX1(int x); // ds_x1 + static void SetSpanX2(int x); // ds_x2 + static void SetSpanXStep(dsfixed_t step); // ds_xstep + static void SetSpanYStep(dsfixed_t step); // ds_ystep + static void SetSpanXBits(int bits); // ds_xbits + static void SetSpanYBits(int bits); // ds_ybits + static void SetSpanXFrac(dsfixed_t frac); // ds_xfrac + static void SetSpanYFrac(dsfixed_t frac); // ds_yfrac + static void SetSpanLight(FColormap *base_colormap, float light, int shade); + static void SetSpanSource(FTexture *texture); + static void SetSpanStyle(fixed_t alpha, bool additive, bool masked); + + static void SetSlabLight(const BYTE *colormap); + + static ESPSResult SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, DWORD color); + static ESPSResult SetPatchStyle(FRenderStyle style, float alpha, int translation, DWORD color); + // Call this after finished drawing the current thing, in case its style was STYLE_Shade + static void SetBaseStyle(); + static void FinishSetPatchStyle(); + + static void SetMaskedColumnState(short *mfloorclip, short *mceilingclip, double spryscale, double sprtopscreen, bool sprflipvert); + static void SetTiltedSpanState(FVector3 plane_sz, FVector3 plane_su, FVector3 plane_sv, bool plane_shade, float planelightfloat, fixed_t pviewx, fixed_t pviewy); + + static void RtInitCols(BYTE *buffer); + static void RtSpanCoverage(int x, int start, int stop); + + static void DrawMaskedColumn(int x, const BYTE *column, const FTexture::Span *spans); + static void DrawMaskedColumnHoriz(int x, const BYTE *column, const FTexture::Span *spans); + + static void DrawRt4cols(int sx); + static void DrawColumn(); + static void DrawHColumnPre(); + static void DrawSpan(); + static void DrawSimplePolySpan(); + + static void DrawWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *texture, fixed_t texturefrac, const BYTE *(*getcol)(FTexture *tex, int col) = nullptr); + static void DrawMaskedWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *texture, fixed_t texturefrac, const BYTE *(*getcol)(FTexture *tex, int col) = nullptr); + static void DrawTransMaskedWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *texture, fixed_t texturefrac, const BYTE *(*getcol)(FTexture *tex, int col) = nullptr); + + static void DrawColoredSpan(int y, int x1, int x2); + static void DrawTiltedSpan(int y, int x1, int x2); + + static void DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *source, int dest_x, int dest_y); + + static void FillTransColumn(int x, int y1, int y2, int color, int alpha); + + static void DrawFogBoundary(int x1, int x2, short *uclip, short *dclip); +}; + +#endif diff --git a/src/r_drawt.cpp b/src/r_drawt.cpp index 837093044..a74dc0133 100644 --- a/src/r_drawt.cpp +++ b/src/r_drawt.cpp @@ -38,6 +38,8 @@ ** Let's hope so. :-) */ +#define DRAWER_INTERNALS + #include "templates.h" #include "doomtype.h" #include "doomdef.h" @@ -1128,26 +1130,26 @@ void R_DrawMaskedColumnHoriz (const BYTE *column, const FTexture::Span *span) const int top = span->TopOffset; // calculate unclipped screen coordinates for post - dc_yl = xs_RoundToInt(sprtopscreen + spryscale * top); - dc_yh = xs_RoundToInt(sprtopscreen + spryscale * (top + length) - 1); + dc_yl = xs_RoundToInt(dc_sprtopscreen + dc_spryscale * top); + dc_yh = xs_RoundToInt(dc_sprtopscreen + dc_spryscale * (top + length) - 1); - if (sprflipvert) + if (dc_sprflipvert) { swapvalues (dc_yl, dc_yh); } - if (dc_yh >= mfloorclip[dc_x]) + if (dc_yh >= dc_mfloorclip[dc_x]) { - dc_yh = mfloorclip[dc_x] - 1; + dc_yh = dc_mfloorclip[dc_x] - 1; } - if (dc_yl < mceilingclip[dc_x]) + if (dc_yl < dc_mceilingclip[dc_x]) { - dc_yl = mceilingclip[dc_x]; + dc_yl = dc_mceilingclip[dc_x]; } if (dc_yl <= dc_yh) { - if (sprflipvert) + if (dc_sprflipvert) { dc_texturefrac = (dc_yl*dc_iscale) - (top << FRACBITS) - fixed_t(CenterY * dc_iscale) - texturemid; @@ -1178,7 +1180,7 @@ void R_DrawMaskedColumnHoriz (const BYTE *column, const FTexture::Span *span) } fixed_t endfrac = dc_texturefrac + (dc_yh-dc_yl)*dc_iscale; const fixed_t maxfrac = length << FRACBITS; - if (dc_yh < mfloorclip[dc_x]-1 && endfrac < maxfrac - dc_iscale) + if (dc_yh < dc_mfloorclip[dc_x]-1 && endfrac < maxfrac - dc_iscale) { dc_yh++; } @@ -1198,7 +1200,7 @@ nextpost: span++; } - if (sprflipvert) + if (dc_sprflipvert) { unsigned int *front = horizspan[dc_x&3]; unsigned int *back = dc_ctspan[dc_x&3] - 2; diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index d2d715c8d..4b6605b4a 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -34,6 +34,8 @@ ** Please see r_drawt.cpp for a description of the globals used. */ +#define DRAWER_INTERNALS + #include "templates.h" #include "doomtype.h" #include "doomdef.h" diff --git a/src/r_main.cpp b/src/r_main.cpp index 247a98125..a30aa232b 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -23,6 +23,8 @@ // //----------------------------------------------------------------------------- +#define DRAWER_INTERNALS + // HEADER FILES ------------------------------------------------------------ #include @@ -150,17 +152,6 @@ angle_t xtoviewangle[MAXWIDTH+1]; bool foggy; // [RH] ignore extralight and fullbright? int r_actualextralight; -void (*colfunc) (void); -void (*basecolfunc) (void); -void (*fuzzcolfunc) (void); -void (*transcolfunc) (void); -void (*spanfunc) (void); - -void (*hcolfunc_pre) (void); -void (*hcolfunc_post1) (int hx, int sx, int yl, int yh); -void (*hcolfunc_post2) (int hx, int sx, int yl, int yh); -void (*hcolfunc_post4) (int sx, int yl, int yh); - cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles; // PRIVATE DATA DEFINITIONS ------------------------------------------------ @@ -820,6 +811,7 @@ void R_SetupBuffer () ASM_PatchPitch (); #endif } + dc_canvas = RenderTarget; dc_destorg = lineptr; for (int i = 0; i < RenderTarget->GetHeight(); i++) { diff --git a/src/r_main.h b/src/r_main.h index fa8fe0bb1..91eb5b183 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -110,22 +110,6 @@ extern FColormap* fixedcolormap; extern FSpecialColormap*realfixedcolormap; -// -// Function pointers to switch refresh/drawing functions. -// Used to select shadow mode etc. -// -extern void (*colfunc) (void); -extern void (*basecolfunc) (void); -extern void (*fuzzcolfunc) (void); -extern void (*transcolfunc) (void); -// No shadow effects on floors. -extern void (*spanfunc) (void); - -// [RH] Function pointers for the horizontal column drawers. -extern void (*hcolfunc_pre) (void); -extern void (*hcolfunc_post1) (int hx, int sx, int yl, int yh); -extern void (*hcolfunc_post2) (int hx, int sx, int yl, int yh); -extern void (*hcolfunc_post4) (int sx, int yl, int yh); void R_InitTextureMapping (); diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 807066f77..c4d7cd59c 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -58,7 +58,7 @@ #include "r_3dfloors.h" #include "v_palette.h" #include "r_data/colormaps.h" -#include "r_draw_rgba.h" +#include "r_drawer_context.h" #ifdef _MSC_VER #pragma warning(disable:4244) @@ -138,8 +138,6 @@ FVector3 plane_sz, plane_su, plane_sv; float planelightfloat; bool plane_shade; fixed_t pviewx, pviewy; - -void R_DrawTiltedPlane_ASM (int y, int x1); } float yslope[MAXHEIGHT]; @@ -147,13 +145,6 @@ static fixed_t xscale, yscale; static double xstepscale, ystepscale; static double basexfrac, baseyfrac; -#ifdef X86_ASM -extern "C" void R_SetSpanSource_ASM (const BYTE *flat); -extern "C" void R_SetSpanSize_ASM (int xbits, int ybits); -extern "C" void R_SetSpanColormap_ASM (BYTE *colormap); -extern "C" void R_SetTiltedSpanSource_ASM (const BYTE *flat); -extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource; -#endif void R_DrawSinglePlane (visplane_t *, fixed_t alpha, bool additive, bool masked); //========================================================================== @@ -220,304 +211,22 @@ void R_MapPlane (int y, int x1) distance = planeheight * yslope[y]; - ds_xstep = xs_ToFixed(32-ds_xbits, distance * xstepscale); - ds_ystep = xs_ToFixed(32-ds_ybits, distance * ystepscale); - ds_xfrac = xs_ToFixed(32-ds_xbits, distance * basexfrac) + pviewx; - ds_yfrac = xs_ToFixed(32-ds_ybits, distance * baseyfrac) + pviewy; + DrawerContext::SetSpanXStep(xs_ToFixed(32 - DrawerContext::SpanXBits(), distance * xstepscale)); + DrawerContext::SetSpanYStep(xs_ToFixed(32 - DrawerContext::SpanYBits(), distance * ystepscale)); + DrawerContext::SetSpanXFrac(xs_ToFixed(32 - DrawerContext::SpanXBits(), distance * basexfrac) + pviewx); + DrawerContext::SetSpanYFrac(xs_ToFixed(32 - DrawerContext::SpanYBits(), distance * baseyfrac) + pviewy); if (plane_shade) { // Determine lighting based on the span's distance from the viewer. - R_SetDSColorMapLight(basecolormap, GlobVis * fabs(CenterY - y), planeshade); + DrawerContext::SetSpanLight(basecolormap, GlobVis * fabs(CenterY - y), planeshade); } -#ifdef X86_ASM - if (!r_swtruecolor && ds_colormap != ds_curcolormap) - R_SetSpanColormap_ASM (ds_colormap); -#endif + DrawerContext::SetSpanY(y); + DrawerContext::SetSpanX1(x1); + DrawerContext::SetSpanX2(x2); - ds_y = y; - ds_x1 = x1; - ds_x2 = x2; - - spanfunc (); -} - -//========================================================================== -// -// R_CalcTiltedLighting -// -// Calculates the lighting for one row of a tilted plane. If the definition -// of GETPALOOKUP changes, this needs to change, too. -// -//========================================================================== - -extern "C" { -void R_CalcTiltedLighting (double lval, double lend, int width) -{ - double lstep; - BYTE *lightfiller; - BYTE *basecolormapdata = basecolormap->Maps; - int i = 0; - - if (width == 0 || lval == lend) - { // Constant lighting - lightfiller = basecolormapdata + (GETPALOOKUP(lval, planeshade) << COLORMAPSHIFT); - } - else - { - lstep = (lend - lval) / width; - if (lval >= MAXLIGHTVIS) - { // lval starts "too bright". - lightfiller = basecolormapdata + (GETPALOOKUP(lval, planeshade) << COLORMAPSHIFT); - for (; i <= width && lval >= MAXLIGHTVIS; ++i) - { - tiltlighting[i] = lightfiller; - lval += lstep; - } - } - if (lend >= MAXLIGHTVIS) - { // lend ends "too bright". - lightfiller = basecolormapdata + (GETPALOOKUP(lend, planeshade) << COLORMAPSHIFT); - for (; width > i && lend >= MAXLIGHTVIS; --width) - { - tiltlighting[width] = lightfiller; - lend -= lstep; - } - } - if (width > 0) - { - lval = FIXED2DBL(planeshade) - lval; - lend = FIXED2DBL(planeshade) - lend; - lstep = (lend - lval) / width; - if (lstep < 0) - { // Going from dark to light - if (lval < 1.) - { // All bright - lightfiller = basecolormapdata; - } - else - { - if (lval >= NUMCOLORMAPS) - { // Starts beyond the dark end - BYTE *clight = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT); - while (lval >= NUMCOLORMAPS && i <= width) - { - tiltlighting[i++] = clight; - lval += lstep; - } - if (i > width) - return; - } - while (i <= width && lval >= 0) - { - tiltlighting[i++] = basecolormapdata + (xs_ToInt(lval) << COLORMAPSHIFT); - lval += lstep; - } - lightfiller = basecolormapdata; - } - } - else - { // Going from light to dark - if (lval >= (NUMCOLORMAPS-1)) - { // All dark - lightfiller = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT); - } - else - { - while (lval < 0 && i <= width) - { - tiltlighting[i++] = basecolormapdata; - lval += lstep; - } - if (i > width) - return; - while (i <= width && lval < (NUMCOLORMAPS-1)) - { - tiltlighting[i++] = basecolormapdata + (xs_ToInt(lval) << COLORMAPSHIFT); - lval += lstep; - } - lightfiller = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT); - } - } - } - } - for (; i <= width; i++) - { - tiltlighting[i] = lightfiller; - } -} -} // extern "C" - -//========================================================================== -// -// R_MapTiltedPlane -// -//========================================================================== - -void R_MapTiltedPlane_C (int y, int x1) -{ - int x2 = spanend[y]; - int width = x2 - x1; - double iz, uz, vz; - BYTE *fb; - DWORD u, v; - int i; - - iz = plane_sz[2] + plane_sz[1]*(centery-y) + plane_sz[0]*(x1-centerx); - - // Lighting is simple. It's just linear interpolation from start to end - if (plane_shade) - { - uz = (iz + plane_sz[0]*width) * planelightfloat; - vz = iz * planelightfloat; - R_CalcTiltedLighting (vz, uz, width); - } - - uz = plane_su[2] + plane_su[1]*(centery-y) + plane_su[0]*(x1-centerx); - vz = plane_sv[2] + plane_sv[1]*(centery-y) + plane_sv[0]*(x1-centerx); - - fb = ylookup[y] + x1 + dc_destorg; - - BYTE vshift = 32 - ds_ybits; - BYTE ushift = vshift - ds_xbits; - int umask = ((1 << ds_xbits) - 1) << ds_ybits; - -#if 0 // The "perfect" reference version of this routine. Pretty slow. - // Use it only to see how things are supposed to look. - i = 0; - do - { - double z = 1.f/iz; - - u = SQWORD(uz*z) + pviewx; - v = SQWORD(vz*z) + pviewy; - R_SetDSColorMapLight(tiltlighting[i], 0, 0); - fb[i++] = ds_colormap[ds_source[(v >> vshift) | ((u >> ushift) & umask)]]; - iz += plane_sz[0]; - uz += plane_su[0]; - vz += plane_sv[0]; - } while (--width >= 0); -#else -//#define SPANSIZE 32 -//#define INVSPAN 0.03125f -//#define SPANSIZE 8 -//#define INVSPAN 0.125f -#define SPANSIZE 16 -#define INVSPAN 0.0625f - - double startz = 1.f/iz; - double startu = uz*startz; - double startv = vz*startz; - double izstep, uzstep, vzstep; - - izstep = plane_sz[0] * SPANSIZE; - uzstep = plane_su[0] * SPANSIZE; - vzstep = plane_sv[0] * SPANSIZE; - x1 = 0; - width++; - - while (width >= SPANSIZE) - { - iz += izstep; - uz += uzstep; - vz += vzstep; - - double endz = 1.f/iz; - double endu = uz*endz; - double endv = vz*endz; - DWORD stepu = SQWORD((endu - startu) * INVSPAN); - DWORD stepv = SQWORD((endv - startv) * INVSPAN); - u = SQWORD(startu) + pviewx; - v = SQWORD(startv) + pviewy; - - for (i = SPANSIZE-1; i >= 0; i--) - { - fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); - x1++; - u += stepu; - v += stepv; - } - startu = endu; - startv = endv; - width -= SPANSIZE; - } - if (width > 0) - { - if (width == 1) - { - u = SQWORD(startu); - v = SQWORD(startv); - fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); - } - else - { - double left = width; - iz += plane_sz[0] * left; - uz += plane_su[0] * left; - vz += plane_sv[0] * left; - - double endz = 1.f/iz; - double endu = uz*endz; - double endv = vz*endz; - left = 1.f/left; - DWORD stepu = SQWORD((endu - startu) * left); - DWORD stepv = SQWORD((endv - startv) * left); - u = SQWORD(startu) + pviewx; - v = SQWORD(startv) + pviewy; - - for (; width != 0; width--) - { - fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); - x1++; - u += stepu; - v += stepv; - } - } - } -#endif -} - -void R_MapTiltedPlane_rgba (int y, int x1) -{ - int x2 = spanend[y]; - - // Slopes are broken currently in master. - // Until R_DrawTiltedPlane is fixed we are just going to fill with a solid color. - - uint32_t *source = (uint32_t*)ds_source; - int source_width = 1 << ds_xbits; - int source_height = 1 << ds_ybits; - - uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; - - int count = x2 - x1 + 1; - while (count > 0) - { - *(dest++) = source[0]; - count--; - } -} - -//========================================================================== -// -// R_MapColoredPlane -// -//========================================================================== - -void R_MapColoredPlane_C (int y, int x1) -{ - memset (ylookup[y] + x1 + dc_destorg, ds_color, spanend[y] - x1 + 1); -} - -void R_MapColoredPlane_rgba(int y, int x1) -{ - uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; - int count = (spanend[y] - x1 + 1); - uint32_t light = calc_light_multiplier(ds_light); - uint32_t color = shade_pal_index_simple(ds_color, light); - for (int i = 0; i < count; i++) - dest[i] = color; + DrawerContext::DrawSpan(); } //========================================================================== @@ -1014,7 +723,7 @@ static void R_DrawSky (visplane_t *pl) rw_offset = 0; frontyScale = rw_pic->Scale.Y; - dc_texturemid = skymid * frontyScale; + DrawerContext::SetTextureMid(skymid * frontyScale); if (1 << frontskytex->HeightBits == frontskytex->GetHeight()) { // The texture tiles nicely @@ -1023,8 +732,8 @@ static void R_DrawSky (visplane_t *pl) lastskycol[x] = 0xffffffff; lastskycol_bgra[x] = 0xffffffff; } - wallscan (pl->left, pl->right, (short *)pl->top, (short *)pl->bottom, swall, lwall, - frontyScale, backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); + DrawerContext::DrawWall (pl->left, pl->right, (short *)pl->top, (short *)pl->bottom, swall, lwall, + frontyScale, rw_pic, rw_offset, backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); } else { // The texture does not tile nicely @@ -1047,7 +756,7 @@ static void R_DrawSkyStriped (visplane_t *pl) if (topfrac < 0) topfrac += frontskytex->GetHeight(); yl = 0; yh = short((frontskytex->GetHeight() - topfrac) * frontyScale); - dc_texturemid = topfrac - iscale * (1 - CenterY); + DrawerContext::SetTextureMid(topfrac - iscale * (1 - CenterY)); while (yl < viewheight) { @@ -1061,11 +770,11 @@ static void R_DrawSkyStriped (visplane_t *pl) lastskycol[x] = 0xffffffff; lastskycol_bgra[x] = 0xffffffff; } - wallscan (pl->left, pl->right, top, bot, swall, lwall, rw_pic->Scale.Y, - backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); + DrawerContext::DrawWall(pl->left, pl->right, top, bot, swall, lwall, rw_pic->Scale.Y, + rw_pic, rw_offset, backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); yl = yh; yh += drawheight; - dc_texturemid = iscale * (centery-yl-1); + DrawerContext::SetTextureMid(iscale * (centery-yl-1)); } } @@ -1086,7 +795,7 @@ int R_DrawPlanes () int i; int vpcount = 0; - ds_color = 3; + DrawerContext::SetFlatColor(3); for (i = 0; i < MAXVISPLANES; i++) { @@ -1111,7 +820,7 @@ void R_DrawHeightPlanes(double height) visplane_t *pl; int i; - ds_color = 3; + DrawerContext::SetFlatColor(3); DVector3 oViewPos = ViewPos; DAngle oViewAngle = ViewAngle; @@ -1151,8 +860,8 @@ void R_DrawSinglePlane (visplane_t *pl, fixed_t alpha, bool additive, bool maske if (r_drawflat) { // [RH] no texture mapping - ds_color += 4; - R_MapVisPlane (pl, R_MapColoredPlane); + DrawerContext::SetFlatColor(DrawerContext::FlatColor() + 4); + R_MapVisPlane (pl, [](int y, int x1) { DrawerContext::DrawColoredSpan(y, x1, spanend[y]); }); } else if (pl->picnum == skyflatnum) { // sky flat @@ -1175,13 +884,9 @@ void R_DrawSinglePlane (visplane_t *pl, fixed_t alpha, bool additive, bool maske { // Don't waste time on a masked texture if it isn't really masked. masked = false; } - R_SetupSpanBits(tex); double xscale = pl->xform.xScale * tex->Scale.X; double yscale = pl->xform.yScale * tex->Scale.Y; - if (r_swtruecolor) - ds_source = (const BYTE*)tex->GetPixelsBgra(); - else - ds_source = tex->GetPixels(); + DrawerContext::SetSpanSource(tex); basecolormap = pl->colormap; planeshade = LIGHT2SHADE(pl->lightlevel); @@ -1544,13 +1249,13 @@ void R_DrawSkyPlane (visplane_t *pl) bool fakefixed = false; if (fixedcolormap) { - R_SetColorMapLight(fixedcolormap, 0, 0); + DrawerContext::SetLight(fixedcolormap, 0, 0); } else { fakefixed = true; fixedcolormap = &NormalLight; - R_SetColorMapLight(fixedcolormap, 0, 0); + DrawerContext::SetLight(fixedcolormap, 0, 0); } R_DrawSky (pl); @@ -1567,13 +1272,6 @@ void R_DrawSkyPlane (visplane_t *pl) void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) { -#ifdef X86_ASM - if (!r_swtruecolor && ds_source != ds_cursource) - { - R_SetSpanSource_ASM (ds_source); - } -#endif - if (alpha <= 0) { return; @@ -1583,8 +1281,8 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t double xstep, ystep, leftxfrac, leftyfrac, rightxfrac, rightyfrac; double x; - xscale = xs_ToFixed(32 - ds_xbits, _xscale); - yscale = xs_ToFixed(32 - ds_ybits, _yscale); + xscale = xs_ToFixed(32 - DrawerContext::SpanXBits(), _xscale); + yscale = xs_ToFixed(32 - DrawerContext::SpanYBits(), _yscale); if (planeang != 0) { double cosine = cos(planeang), sine = sin(planeang); @@ -1631,15 +1329,14 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t planeheight = fabs(pl->height.Zat0() - ViewPos.Z); GlobVis = r_FloorVisibility / planeheight; - ds_light = 0; if (fixedlightlev >= 0) { - R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + DrawerContext::SetSpanLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); plane_shade = false; } else if (fixedcolormap) { - R_SetDSColorMapLight(fixedcolormap, 0, 0); + DrawerContext::SetSpanLight(fixedcolormap, 0, 0); plane_shade = false; } else @@ -1647,61 +1344,8 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t plane_shade = true; } - if (spanfunc != R_FillSpan) - { - if (masked) - { - if (alpha < OPAQUE || additive) - { - if (!additive) - { - spanfunc = R_DrawSpanMaskedTranslucent; - dc_srcblend = Col2RGB8[alpha>>10]; - dc_destblend = Col2RGB8[(OPAQUE-alpha)>>10]; - dc_srcalpha = alpha; - dc_destalpha = OPAQUE - alpha; - } - else - { - spanfunc = R_DrawSpanMaskedAddClamp; - dc_srcblend = Col2RGB8_LessPrecision[alpha>>10]; - dc_destblend = Col2RGB8_LessPrecision[FRACUNIT>>10]; - dc_srcalpha = alpha; - dc_destalpha = OPAQUE - alpha; - } - } - else - { - spanfunc = R_DrawSpanMasked; - } - } - else - { - if (alpha < OPAQUE || additive) - { - if (!additive) - { - spanfunc = R_DrawSpanTranslucent; - dc_srcblend = Col2RGB8[alpha>>10]; - dc_destblend = Col2RGB8[(OPAQUE-alpha)>>10]; - dc_srcalpha = alpha; - dc_destalpha = OPAQUE - alpha; - } - else - { - spanfunc = R_DrawSpanAddClamp; - dc_srcblend = Col2RGB8_LessPrecision[alpha>>10]; - dc_destblend = Col2RGB8_LessPrecision[FRACUNIT>>10]; - dc_srcalpha = alpha; - dc_destalpha = OPAQUE - alpha; - } - } - else - { - spanfunc = R_DrawSpan; - } - } - } + DrawerContext::SetSpanStyle(alpha, additive, masked); + R_MapVisPlane (pl, R_MapPlane); } @@ -1733,14 +1377,14 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t return; } - lxscale = _xscale * ifloatpow2[ds_xbits]; - lyscale = _yscale * ifloatpow2[ds_ybits]; + lxscale = _xscale * ifloatpow2[DrawerContext::SpanXBits()]; + lyscale = _yscale * ifloatpow2[DrawerContext::SpanYBits()]; xscale = 64.f / lxscale; yscale = 64.f / lyscale; zeroheight = pl->height.ZatPoint(ViewPos); - pviewx = xs_ToFixed(32 - ds_xbits, pl->xform.xOffs * pl->xform.xScale); - pviewy = xs_ToFixed(32 - ds_ybits, pl->xform.yOffs * pl->xform.yScale); + pviewx = xs_ToFixed(32 - DrawerContext::SpanXBits(), pl->xform.xOffs * pl->xform.xScale); + pviewy = xs_ToFixed(32 - DrawerContext::SpanYBits(), pl->xform.yOffs * pl->xform.yScale); planeang = (pl->xform.Angle + pl->xform.baseAngle).Radians(); // p is the texture origin in view space @@ -1810,42 +1454,22 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t if (fixedlightlev >= 0) { - R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + DrawerContext::SetSpanLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); plane_shade = false; } else if (fixedcolormap) { - R_SetDSColorMapLight(fixedcolormap, 0, 0); + DrawerContext::SetSpanLight(fixedcolormap, 0, 0); plane_shade = false; } else { - R_SetDSColorMapLight(basecolormap, 0, 0); + DrawerContext::SetSpanLight(basecolormap, 0, 0); plane_shade = true; } - if (!plane_shade) - { - for (int i = 0; i < viewwidth; ++i) - { - tiltlighting[i] = ds_colormap; - } - } - -#if defined(X86_ASM) - if (!r_swtruecolor) - { - if (ds_source != ds_curtiltedsource) - R_SetTiltedSpanSource_ASM(ds_source); - R_MapVisPlane(pl, R_DrawTiltedPlane_ASM); - } - else - { - R_MapVisPlane(pl, R_MapTiltedPlane); - } -#else - R_MapVisPlane (pl, R_MapTiltedPlane); -#endif + DrawerContext::SetTiltedSpanState(plane_sz, plane_su, plane_sv, plane_shade, planelightfloat, pviewx, pviewy); + R_MapVisPlane (pl, [](int y, int x1) { DrawerContext::DrawTiltedSpan(y, x1, spanend[y]); }); } //========================================================================== diff --git a/src/r_plane.h b/src/r_plane.h index b199d3477..d4db3dc09 100644 --- a/src/r_plane.h +++ b/src/r_plane.h @@ -93,14 +93,6 @@ void R_DrawNormalPlane (visplane_t *pl, double xscale, double yscale, fixed_t al void R_DrawTiltedPlane (visplane_t *pl, double xscale, double yscale, fixed_t alpha, bool additive, bool masked); void R_MapVisPlane (visplane_t *pl, void (*mapfunc)(int y, int x1)); -extern void(*R_MapColoredPlane)(int y, int x1); -extern void(*R_MapTiltedPlane)(int y, int x1); - -void R_MapTiltedPlane_C(int y, int x1); -void R_MapTiltedPlane_rgba(int y, int x); -void R_MapColoredPlane_C(int y, int x1); -void R_MapColoredPlane_rgba(int y, int x1); - visplane_t *R_FindPlane ( const secplane_t &height, FTextureID picnum, diff --git a/src/r_segs.cpp b/src/r_segs.cpp index ad242b2f9..2476e07ac 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -52,6 +52,7 @@ #include "r_3dfloors.h" #include "v_palette.h" #include "r_data/colormaps.h" +#include "r_drawer_context.h" #define WALLYREPEAT 8 @@ -172,19 +173,19 @@ CVAR(Bool, r_drawmirrors, true, 0) float *MaskedSWall; float MaskedScaleY; -static void BlastMaskedColumn (void (*blastfunc)(const BYTE *pixels, const FTexture::Span *spans), FTexture *tex) +static void BlastMaskedColumn (int x, void (*blastfunc)(int x, const BYTE *pixels, const FTexture::Span *spans), FTexture *tex) { // calculate lighting if (fixedcolormap == NULL && fixedlightlev < 0) { - R_SetColorMapLight(basecolormap, rw_light, wallshade); + DrawerContext::SetLight(basecolormap, rw_light, wallshade); } - dc_iscale = xs_Fix<16>::ToFix(MaskedSWall[dc_x] * MaskedScaleY); + DrawerContext::SetTextureStep(xs_Fix<16>::ToFix(MaskedSWall[x] * MaskedScaleY)); if (sprflipvert) - sprtopscreen = CenterY + dc_texturemid * spryscale; + sprtopscreen = CenterY + DrawerContext::TextureMid() * spryscale; else - sprtopscreen = CenterY - dc_texturemid * spryscale; + sprtopscreen = CenterY - DrawerContext::TextureMid() * spryscale; // killough 1/25/98: here's where Medusa came in, because // it implicitly assumed that the column was all one patch. @@ -194,10 +195,12 @@ static void BlastMaskedColumn (void (*blastfunc)(const BYTE *pixels, const FText // the Medusa effect. The fix is to construct true columns // when forming multipatched textures (see r_data.c). + DrawerContext::SetMaskedColumnState(mfloorclip, mceilingclip, spryscale, sprtopscreen, sprflipvert); + // draw the texture const FTexture::Span *spans; - const BYTE *pixels = tex->GetColumn (maskedtexturecol[dc_x] >> FRACBITS, &spans); - blastfunc (pixels, spans); + const BYTE *pixels = tex->GetColumn (maskedtexturecol[x] >> FRACBITS, &spans); + blastfunc (x, pixels, spans); rw_light += rw_lightstep; spryscale += rw_scalestep; } @@ -243,7 +246,7 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) // [RH] modified because we don't use user-definable translucency maps ESPSResult drawmode; - drawmode = R_SetPatchStyle (LegacyRenderStyles[curline->linedef->flags & ML_ADDTRANS ? STYLE_Add : STYLE_Translucent], + drawmode = DrawerContext::SetPatchStyle (LegacyRenderStyles[curline->linedef->flags & ML_ADDTRANS ? STYLE_Add : STYLE_Translucent], (float)MIN(curline->linedef->alpha, 1.), 0, 0); if ((drawmode == DontDraw && !ds->bFogBoundary && !ds->bFakeBoundary)) @@ -295,7 +298,7 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) // [RH] Draw fog partition if (ds->bFogBoundary) { - R_DrawFogBoundary (x1, x2, mceilingclip, mfloorclip); + DrawerContext::DrawFogBoundary (x1, x2, mceilingclip, mfloorclip); if (ds->maskedtexturecol == -1) { goto clearfog; @@ -313,9 +316,9 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) rw_scalestep = ds->iscalestep; if (fixedlightlev >= 0) - R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + DrawerContext::SetLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - R_SetColorMapLight(fixedcolormap, 0, 0); + DrawerContext::SetLight(fixedcolormap, 0, 0); // find positioning texheight = tex->GetScaledHeightDouble(); @@ -326,11 +329,11 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) } if (curline->linedef->flags & ML_DONTPEGBOTTOM) { - dc_texturemid = MAX(frontsector->GetPlaneTexZ(sector_t::floor), backsector->GetPlaneTexZ(sector_t::floor)) + texheight; + DrawerContext::SetTextureMid(MAX(frontsector->GetPlaneTexZ(sector_t::floor), backsector->GetPlaneTexZ(sector_t::floor)) + texheight); } else { - dc_texturemid = MIN(frontsector->GetPlaneTexZ(sector_t::ceiling), backsector->GetPlaneTexZ(sector_t::ceiling)); + DrawerContext::SetTextureMid(MIN(frontsector->GetPlaneTexZ(sector_t::ceiling), backsector->GetPlaneTexZ(sector_t::ceiling))); } rowoffset = curline->sidedef->GetTextureYOffset(side_t::mid); @@ -349,21 +352,21 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) { // rowoffset is added before the multiply so that the masked texture will // still be positioned in world units rather than texels. - dc_texturemid += rowoffset - ViewPos.Z; - textop = dc_texturemid; - dc_texturemid *= MaskedScaleY; + DrawerContext::SetTextureMid(DrawerContext::TextureMid() + rowoffset - ViewPos.Z); + textop = DrawerContext::TextureMid(); + DrawerContext::SetTextureMid(DrawerContext::TextureMid() * MaskedScaleY); } else { // rowoffset is added outside the multiply so that it positions the texture // by texels instead of world units. - textop = dc_texturemid + rowoffset / MaskedScaleY - ViewPos.Z; - dc_texturemid = (dc_texturemid - ViewPos.Z) * MaskedScaleY + rowoffset; + textop = DrawerContext::TextureMid() + rowoffset / MaskedScaleY - ViewPos.Z; + DrawerContext::SetTextureMid((DrawerContext::TextureMid() - ViewPos.Z) * MaskedScaleY + rowoffset); } if (sprflipvert) { MaskedScaleY = -MaskedScaleY; - dc_texturemid -= tex->GetHeight() << FRACBITS; + DrawerContext::SetTextureMid(DrawerContext::TextureMid() - tex->GetHeight()); } // [RH] Don't bother drawing segs that are completely offscreen @@ -438,9 +441,9 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) // draw the columns one at a time if (drawmode == DoDraw0) { - for (dc_x = x1; dc_x < x2; ++dc_x) + for (int x = x1; x < x2; ++x) { - BlastMaskedColumn (R_DrawMaskedColumn, tex); + BlastMaskedColumn (x, DrawerContext::DrawMaskedColumn, tex); } } else @@ -451,29 +454,29 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) if (x1 >= x2) goto clearfog; - dc_x = x1; + int x = x1; - while ((dc_x < stop) && (dc_x & 3)) + while ((x < stop) && (x & 3)) { - BlastMaskedColumn (R_DrawMaskedColumn, tex); - dc_x++; + BlastMaskedColumn (x, DrawerContext::DrawMaskedColumn, tex); + x++; } - while (dc_x < stop) + while (x < stop) { - rt_initcols(nullptr); - BlastMaskedColumn (R_DrawMaskedColumnHoriz, tex); dc_x++; - BlastMaskedColumn (R_DrawMaskedColumnHoriz, tex); dc_x++; - BlastMaskedColumn (R_DrawMaskedColumnHoriz, tex); dc_x++; - BlastMaskedColumn (R_DrawMaskedColumnHoriz, tex); - rt_draw4cols (dc_x - 3); - dc_x++; + DrawerContext::RtInitCols(nullptr); + BlastMaskedColumn (x, DrawerContext::DrawMaskedColumnHoriz, tex); + BlastMaskedColumn (x + 1, DrawerContext::DrawMaskedColumnHoriz, tex); + BlastMaskedColumn (x + 2, DrawerContext::DrawMaskedColumnHoriz, tex); + BlastMaskedColumn (x + 3, DrawerContext::DrawMaskedColumnHoriz, tex); + DrawerContext::DrawRt4cols (x); + x += 4; } - while (dc_x < x2) + while (x < x2) { - BlastMaskedColumn (R_DrawMaskedColumn, tex); - dc_x++; + BlastMaskedColumn (x, DrawerContext::DrawMaskedColumn, tex); + x++; } } } @@ -483,13 +486,13 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) { // rowoffset is added before the multiply so that the masked texture will // still be positioned in world units rather than texels. - dc_texturemid = (dc_texturemid - ViewPos.Z + rowoffset) * MaskedScaleY; + DrawerContext::SetTextureMid((DrawerContext::TextureMid() - ViewPos.Z + rowoffset) * MaskedScaleY); } else { // rowoffset is added outside the multiply so that it positions the texture // by texels instead of world units. - dc_texturemid = (dc_texturemid - ViewPos.Z) * MaskedScaleY + rowoffset; + DrawerContext::SetTextureMid((DrawerContext::TextureMid() - ViewPos.Z) * MaskedScaleY + rowoffset); } WallC.sz1 = ds->sz1; @@ -535,7 +538,7 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) } clearfog: - R_FinishSetPatchStyle (); + DrawerContext::FinishSetPatchStyle (); if (ds->bFakeBoundary & 3) { R_RenderFakeWallRange(ds, x1, x2); @@ -567,11 +570,11 @@ void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover) fixed_t Alpha = Scale(rover->alpha, OPAQUE, 255); ESPSResult drawmode; - drawmode = R_SetPatchStyle (LegacyRenderStyles[rover->flags & FF_ADDITIVETRANS ? STYLE_Add : STYLE_Translucent], + drawmode = DrawerContext::SetPatchStyle (LegacyRenderStyles[rover->flags & FF_ADDITIVETRANS ? STYLE_Add : STYLE_Translucent], Alpha, 0, 0); if(drawmode == DontDraw) { - R_FinishSetPatchStyle(); + DrawerContext::FinishSetPatchStyle(); return; } @@ -613,26 +616,26 @@ void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover) { rowoffset += rw_pic->GetHeight(); } - dc_texturemid = (planez - ViewPos.Z) * yscale; + DrawerContext::SetTextureMid((planez - ViewPos.Z) * yscale); if (rw_pic->bWorldPanning) { // rowoffset is added before the multiply so that the masked texture will // still be positioned in world units rather than texels. - dc_texturemid = dc_texturemid + rowoffset * yscale; + DrawerContext::SetTextureMid(DrawerContext::TextureMid() + rowoffset * yscale); rw_offset = xs_RoundToInt(rw_offset * xscale); } else { // rowoffset is added outside the multiply so that it positions the texture // by texels instead of world units. - dc_texturemid += rowoffset; + DrawerContext::SetTextureMid(DrawerContext::TextureMid() + rowoffset); } if (fixedlightlev >= 0) - R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + DrawerContext::SetLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - R_SetColorMapLight(fixedcolormap, 0, 0); + DrawerContext::SetLight(fixedcolormap, 0, 0); WallC.sz1 = ds->sz1; WallC.sz2 = ds->sz2; @@ -660,7 +663,7 @@ void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover) PrepLWall (lwall, curline->sidedef->TexelLength*xscale, ds->sx1, ds->sx2); wallscan_np2_ds(ds, x1, x2, wallupper, walllower, MaskedSWall, lwall, yscale); - R_FinishSetPatchStyle(); + DrawerContext::FinishSetPatchStyle(); } // kg3D - walls of fake floors @@ -1065,360 +1068,6 @@ void R_RenderFakeWallRange (drawseg_t *ds, int x1, int x2) return; } -// Draw a column with support for non-power-of-two ranges -uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv_step, uint32_t uv_max, const BYTE *source, DWORD(*draw1column)()) -{ - int pixelsize = r_swtruecolor ? 4 : 1; - if (uv_max == 0) // power of two - { - int count = y2 - y1; - - dc_source = source; - dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; - dc_count = count; - dc_iscale = uv_step; - dc_texturefrac = uv_start; - draw1column(); - - uint64_t step64 = uv_step; - uint64_t pos64 = uv_start; - return (uint32_t)(pos64 + step64 * count); - } - else - { - uint32_t uv_pos = uv_start; - - uint32_t left = y2 - y1; - while (left > 0) - { - uint32_t available = uv_max - uv_pos; - uint32_t next_uv_wrap = available / uv_step; - if (available % uv_step != 0) - next_uv_wrap++; - uint32_t count = MIN(left, next_uv_wrap); - - dc_source = source; - dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; - dc_count = count; - dc_iscale = uv_step; - dc_texturefrac = uv_pos; - draw1column(); - - left -= count; - uv_pos += uv_step * count; - if (uv_pos >= uv_max) - uv_pos -= uv_max; - } - - return uv_pos; - } -} - -// Draw four columns with support for non-power-of-two ranges -void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_step, uint32_t uv_max, const BYTE **source, void(*draw4columns)()) -{ - int pixelsize = r_swtruecolor ? 4 : 1; - if (uv_max == 0) // power of two, no wrap handling needed - { - int count = y2 - y1; - for (int i = 0; i < 4; i++) - { - bufplce[i] = source[i]; - vplce[i] = uv_pos[i]; - vince[i] = uv_step[i]; - - uint64_t step64 = uv_step[i]; - uint64_t pos64 = uv_pos[i]; - uv_pos[i] = (uint32_t)(pos64 + step64 * count); - } - dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; - dc_count = count; - draw4columns(); - } - else - { - dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; - for (int i = 0; i < 4; i++) - bufplce[i] = source[i]; - - uint32_t left = y2 - y1; - while (left > 0) - { - // Find which column wraps first - uint32_t count = left; - for (int i = 0; i < 4; i++) - { - uint32_t available = uv_max - uv_pos[i]; - uint32_t next_uv_wrap = available / uv_step[i]; - if (available % uv_step[i] != 0) - next_uv_wrap++; - count = MIN(next_uv_wrap, count); - } - - // Draw until that column wraps - for (int i = 0; i < 4; i++) - { - vplce[i] = uv_pos[i]; - vince[i] = uv_step[i]; - } - dc_count = count; - draw4columns(); - - // Wrap the uv position - for (int i = 0; i < 4; i++) - { - uv_pos[i] += uv_step[i] * count; - if (uv_pos[i] >= uv_max) - uv_pos[i] -= uv_max; - } - - left -= count; - } - } -} - -// Calculates a wrapped uv start position value for a column -void calc_uv_start_and_step(int y1, float swal, double yrepeat, uint32_t uv_height, int fracbits, uint32_t &uv_start_out, uint32_t &uv_step_out) -{ - double uv_stepd = swal * yrepeat; - - // Find start uv in [0-uv_height[ range. - // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. - double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / uv_height; - v = v - floor(v); - v *= uv_height; - v *= (1 << fracbits); - - uv_start_out = (uint32_t)v; - uv_step_out = xs_ToFixed(fracbits, uv_stepd); -} - -typedef DWORD(*Draw1ColumnFuncPtr)(); -typedef void(*Draw4ColumnsFuncPtr)(); - -void wallscan_any( - int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, - const BYTE *(*getcol)(FTexture *tex, int x), - void(setupwallscan(int bits,Draw1ColumnFuncPtr &draw1, Draw4ColumnsFuncPtr &draw2))) -{ - if (rw_pic->UseType == FTexture::TEX_Null) - return; - - uint32_t uv_height = rw_pic->GetHeight(); - uint32_t fracbits = 32 - rw_pic->HeightBits; - uint32_t uv_max = uv_height << fracbits; - - DWORD(*draw1column)(); - void(*draw4columns)(); - setupwallscan(fracbits, draw1column, draw4columns); - - fixed_t xoffset = rw_offset; - - bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); - if (fixed) - { - palookupoffse[0] = dc_colormap; - palookupoffse[1] = dc_colormap; - palookupoffse[2] = dc_colormap; - palookupoffse[3] = dc_colormap; - palookuplight[0] = 0; - palookuplight[1] = 0; - palookuplight[2] = 0; - palookuplight[3] = 0; - } - - if (fixedcolormap) - R_SetColorMapLight(fixedcolormap, 0, 0); - else - R_SetColorMapLight(basecolormap, 0, 0); - - float light = rw_light; - - // Calculate where 4 column alignment begins and ends: - int aligned_x1 = clamp((x1 + 3) / 4 * 4, x1, x2); - int aligned_x2 = clamp(x2 / 4 * 4, x1, x2); - - // First unaligned columns: - for (int x = x1; x < aligned_x1; x++, light += rw_lightstep) - { - int y1 = uwal[x]; - int y2 = dwal[x]; - if (y2 <= y1) - continue; - - if (!fixed) - R_SetColorMapLight(basecolormap, light, wallshade); - - const BYTE *source = getcol(rw_pic, (lwal[x] + xoffset) >> FRACBITS); - - uint32_t uv_start, uv_step; - calc_uv_start_and_step(y1, swal[x], yrepeat, uv_height, fracbits, uv_start, uv_step); - - wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, source, draw1column); - } - - // The aligned columns - for (int x = aligned_x1; x < aligned_x2; x += 4) - { - // Find y1, y2, light and uv values for four columns: - int y1[4] = { uwal[x], uwal[x + 1], uwal[x + 2], uwal[x + 3] }; - int y2[4] = { dwal[x], dwal[x + 1], dwal[x + 2], dwal[x + 3] }; - - const BYTE *source[4]; - for (int i = 0; i < 4; i++) - source[i] = getcol(rw_pic, (lwal[x + i] + xoffset) >> FRACBITS); - - float lights[4]; - for (int i = 0; i < 4; i++) - { - lights[i] = light; - light += rw_lightstep; - } - - uint32_t uv_pos[4], uv_step[4]; - for (int i = 0; i < 4; i++) - calc_uv_start_and_step(y1[i], swal[x + i], yrepeat, uv_height, fracbits, uv_pos[i], uv_step[i]); - - // Figure out where we vertically can start and stop drawing 4 columns in one go - int middle_y1 = y1[0]; - int middle_y2 = y2[0]; - for (int i = 1; i < 4; i++) - { - middle_y1 = MAX(y1[i], middle_y1); - middle_y2 = MIN(y2[i], middle_y2); - } - - // If we got an empty column in our set we cannot draw 4 columns in one go: - bool empty_column_in_set = false; - for (int i = 0; i < 4; i++) - { - if (y2[i] <= y1[i]) - empty_column_in_set = true; - } - - if (empty_column_in_set || middle_y2 <= middle_y1) - { - for (int i = 0; i < 4; i++) - { - if (y2[i] <= y1[i]) - continue; - - if (!fixed) - R_SetColorMapLight(basecolormap, lights[i], wallshade); - wallscan_drawcol1(x + i, y1[i], y2[i], uv_pos[i], uv_step[i], uv_max, source[i], draw1column); - } - continue; - } - - // Draw the first rows where not all 4 columns are active - for (int i = 0; i < 4; i++) - { - if (!fixed) - R_SetColorMapLight(basecolormap, lights[i], wallshade); - - if (y1[i] < middle_y1) - uv_pos[i] = wallscan_drawcol1(x + i, y1[i], middle_y1, uv_pos[i], uv_step[i], uv_max, source[i], draw1column); - } - - // Draw the area where all 4 columns are active - if (!fixed) - { - for (int i = 0; i < 4; i++) - { - if (r_swtruecolor) - { - palookupoffse[i] = basecolormap->Maps; - palookuplight[i] = LIGHTSCALE(lights[i], wallshade); - } - else - { - palookupoffse[i] = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); - palookuplight[i] = 0; - } - } - } - wallscan_drawcol4(x, middle_y1, middle_y2, uv_pos, uv_step, uv_max, source, draw4columns); - - // Draw the last rows where not all 4 columns are active - for (int i = 0; i < 4; i++) - { - if (!fixed) - R_SetColorMapLight(basecolormap, lights[i], wallshade); - - if (middle_y2 < y2[i]) - uv_pos[i] = wallscan_drawcol1(x + i, middle_y2, y2[i], uv_pos[i], uv_step[i], uv_max, source[i], draw1column); - } - } - - // The last unaligned columns: - for (int x = aligned_x2; x < x2; x++, light += rw_lightstep) - { - int y1 = uwal[x]; - int y2 = dwal[x]; - if (y2 <= y1) - continue; - - if (!fixed) - R_SetColorMapLight(basecolormap, light, wallshade); - - const BYTE *source = getcol(rw_pic, (lwal[x] + xoffset) >> FRACBITS); - - uint32_t uv_start, uv_step; - calc_uv_start_and_step(y1, swal[x], yrepeat, uv_height, fracbits, uv_start, uv_step); - - wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, source, draw1column); - } - - NetUpdate (); -} - -void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) -{ - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) - { - setupvline(bits); - line1 = dovline1; - line4 = dovline4; - }); -} - -void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) -{ - if (!rw_pic->bMasked) // Textures that aren't masked can use the faster wallscan. - { - wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); - } - else - { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) - { - setupmvline(bits); - line1 = domvline1; - line4 = domvline4; - }); - } -} - -void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) -{ - static fixed_t(*tmvline1)(); - static void(*tmvline4)(); - if (!R_GetTransMaskDrawers(&tmvline1, &tmvline4)) - { - // The current translucency is unsupported, so draw with regular maskwallscan instead. - maskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); - } - else - { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) - { - setuptmvline(bits); - line1 = reinterpret_cast(tmvline1); - line4 = tmvline4; - }); - } -} - void wallscan_striped (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat) { FDynamicColormap *startcolormap = basecolormap; @@ -1444,7 +1093,7 @@ void wallscan_striped (int x1, int x2, short *uwal, short *dwal, float *swal, fi { down[j] = clamp (most3[j], up[j], dwal[j]); } - wallscan (x1, x2, up, down, swal, lwal, yrepeat); + DrawerContext::DrawWall (x1, x2, up, down, swal, lwal, yrepeat, rw_pic, rw_offset); up = down; down = (down == most1) ? most2 : most1; } @@ -1455,7 +1104,7 @@ void wallscan_striped (int x1, int x2, short *uwal, short *dwal, float *swal, fi *lit->p_lightlevel, lit->lightsource != NULL) + r_actualextralight); } - wallscan (x1, x2, up, dwal, swal, lwal, yrepeat); + DrawerContext::DrawWall (x1, x2, up, dwal, swal, lwal, yrepeat, rw_pic, rw_offset); basecolormap = startcolormap; wallshade = startshade; } @@ -1464,20 +1113,20 @@ static void call_wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, { if (mask) { - if (colfunc == basecolfunc) + if (DrawerContext::IsBaseColumn()) { - maskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat); + DrawerContext::DrawMaskedWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, rw_pic, rw_offset); } else { - transmaskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat); + DrawerContext::DrawTransMaskedWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, rw_pic, rw_offset); } } else { if (fixedcolormap != NULL || fixedlightlev >= 0 || !(frontsector->e && frontsector->e->XFloor.lightlist.Size())) { - wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat); + DrawerContext::DrawWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, rw_pic, rw_offset); } else { @@ -1513,14 +1162,14 @@ void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t if (yrepeat >= 0) { // normal orientation: draw strips from top to bottom - partition = top - fmod(top - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); + partition = top - fmod(top - DrawerContext::TextureMid() / yrepeat - ViewPos.Z, scaledtexheight); if (partition == top) { partition -= scaledtexheight; } up = uwal; down = most1; - dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; + DrawerContext::SetTextureMid((partition - ViewPos.Z) * yrepeat + texheight); while (partition > bot) { int j = OWallMost(most3, partition - ViewPos.Z, &WallC); @@ -1535,16 +1184,16 @@ void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t down = (down == most1) ? most2 : most1; } partition -= scaledtexheight; - dc_texturemid -= texheight; + DrawerContext::SetTextureMid(DrawerContext::TextureMid() - texheight); } call_wallscan(x1, x2, up, dwal, swal, lwal, yrepeat, mask); } else { // upside down: draw strips from bottom to top - partition = bot - fmod(bot - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); + partition = bot - fmod(bot - DrawerContext::TextureMid() / yrepeat - ViewPos.Z, scaledtexheight); up = most1; down = dwal; - dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; + DrawerContext::SetTextureMid((partition - ViewPos.Z) * yrepeat + texheight); while (partition < top) { int j = OWallMost(most3, partition - ViewPos.Z, &WallC); @@ -1559,7 +1208,7 @@ void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t up = (up == most1) ? most2 : most1; } partition -= scaledtexheight; - dc_texturemid -= texheight; + DrawerContext::SetTextureMid(DrawerContext::TextureMid() - texheight); } call_wallscan(x1, x2, uwal, down, swal, lwal, yrepeat, mask); } @@ -1612,9 +1261,9 @@ void R_RenderSegLoop () fixed_t xoffset = rw_offset; if (fixedlightlev >= 0) - R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + DrawerContext::SetLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - R_SetColorMapLight(fixedcolormap, 0, 0); + DrawerContext::SetLight(fixedcolormap, 0, 0); // clip wall to the floor and ceiling for (x = x1; x < x2; ++x) @@ -1695,7 +1344,7 @@ void R_RenderSegLoop () { // one sided line if (midtexture->UseType != FTexture::TEX_Null && viewactive) { - dc_texturemid = rw_midtexturemid; + DrawerContext::SetTextureMid(rw_midtexturemid); rw_pic = midtexture; xscale = rw_pic->Scale.X * rw_midtexturescalex; yscale = rw_pic->Scale.Y * rw_midtexturescaley; @@ -1738,7 +1387,7 @@ void R_RenderSegLoop () } if (viewactive) { - dc_texturemid = rw_toptexturemid; + DrawerContext::SetTextureMid(rw_toptexturemid); rw_pic = toptexture; xscale = rw_pic->Scale.X * rw_toptexturescalex; yscale = rw_pic->Scale.Y * rw_toptexturescaley; @@ -1784,7 +1433,7 @@ void R_RenderSegLoop () } if (viewactive) { - dc_texturemid = rw_bottomtexturemid; + DrawerContext::SetTextureMid(rw_bottomtexturemid); rw_pic = bottomtexture; xscale = rw_pic->Scale.X * rw_bottomtexturescalex; yscale = rw_pic->Scale.Y * rw_bottomtexturescaley; @@ -2974,7 +2623,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, } yscale = decal->ScaleY; - dc_texturemid = WallSpriteTile->TopOffset + (zpos - ViewPos.Z) / yscale; + DrawerContext::SetTextureMid(WallSpriteTile->TopOffset + (zpos - ViewPos.Z) / yscale); // Clip sprite to drawseg x1 = MAX(clipper->x1, x1); @@ -3011,11 +2660,11 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, rw_light = rw_lightleft + (x1 - WallC.sx1) * rw_lightstep; if (fixedlightlev >= 0) - R_SetColorMapLight(usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + DrawerContext::SetLight(usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - R_SetColorMapLight(fixedcolormap, 0, 0); + DrawerContext::SetLight(fixedcolormap, 0, 0); else if (!foggy && (decal->RenderFlags & RF_FULLBRIGHT)) - R_SetColorMapLight(usecolormap, 0, 0); + DrawerContext::SetLight(usecolormap, 0, 0); else calclighting = true; @@ -3024,7 +2673,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { sprflipvert = true; yscale = -yscale; - dc_texturemid -= WallSpriteTile->GetHeight(); + DrawerContext::SetTextureMid(DrawerContext::TextureMid() - WallSpriteTile->GetHeight()); } else { @@ -3034,10 +2683,9 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, MaskedScaleY = float(1 / yscale); do { - dc_x = x1; ESPSResult mode; - mode = R_SetPatchStyle (decal->RenderStyle, (float)decal->Alpha, decal->Translation, decal->AlphaColor); + mode = DrawerContext::SetPatchStyle (decal->RenderStyle, (float)decal->Alpha, decal->Translation, decal->AlphaColor); // R_SetPatchStyle can modify basecolormap. if (rereadcolormap) @@ -3053,48 +2701,50 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { int stop4; + int x = x1; + if (mode == DoDraw0) { // 1 column at a time - stop4 = dc_x; + stop4 = x; } else // DoDraw1 { // up to 4 columns at a time stop4 = x2 & ~3; } - while ((dc_x < stop4) && (dc_x & 3)) + while ((x < stop4) && (x & 3)) { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap, rw_light, wallshade); + DrawerContext::SetLight(usecolormap, rw_light, wallshade); } - R_WallSpriteColumn (R_DrawMaskedColumn); - dc_x++; + R_WallSpriteColumn (x, DrawerContext::DrawMaskedColumn); + x++; } - while (dc_x < stop4) + while (x < stop4) { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap, rw_light, wallshade); + DrawerContext::SetLight(usecolormap, rw_light, wallshade); } - rt_initcols(nullptr); - for (int zz = 4; zz; --zz) + DrawerContext::RtInitCols(nullptr); + for (int zz = 0; zz < 4; ++zz) { - R_WallSpriteColumn (R_DrawMaskedColumnHoriz); - dc_x++; + R_WallSpriteColumn (x + zz, DrawerContext::DrawMaskedColumnHoriz); } - rt_draw4cols (dc_x - 4); + DrawerContext::DrawRt4cols (x); + x += 4; } - while (dc_x < x2) + while (x < x2) { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap, rw_light, wallshade); + DrawerContext::SetLight(usecolormap, rw_light, wallshade); } - R_WallSpriteColumn (R_DrawMaskedColumn); - dc_x++; + R_WallSpriteColumn (x, DrawerContext::DrawMaskedColumn); + x++; } } @@ -3103,14 +2753,11 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, // needrepeat will be 0, and the while will fail. mceilingclip = floorclip; mfloorclip = wallbottom; - R_FinishSetPatchStyle (); + DrawerContext::FinishSetPatchStyle (); } while (needrepeat--); - colfunc = basecolfunc; - hcolfunc_post1 = rt_map1col; - hcolfunc_post4 = rt_map4cols; - - R_FinishSetPatchStyle (); + DrawerContext::SetBaseStyle(); + DrawerContext::FinishSetPatchStyle (); done: WallC = savecoord; } diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index c4347236d..480cdd02b 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -32,6 +32,7 @@ ** */ +#define DRAWER_INTERNALS #include "r_local.h" #include "v_palette.h" diff --git a/src/r_things.cpp b/src/r_things.cpp index 836f58690..b856b968a 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -58,7 +58,7 @@ #include "r_plane.h" #include "r_segs.h" #include "r_3dfloors.h" -#include "r_draw_rgba.h" +#include "r_drawer_context.h" #include "v_palette.h" #include "r_data/r_translate.h" #include "r_data/colormaps.h" @@ -229,12 +229,6 @@ vissprite_t *R_NewVisSprite (void) return *(vissprite_p-1); } -// -// R_DrawMaskedColumn -// Used for sprites and masked mid textures. -// Masked means: partly transparent, i.e. stored -// in posts/runs of opaque pixels. -// short* mfloorclip; short* mceilingclip; @@ -243,88 +237,6 @@ double sprtopscreen; bool sprflipvert; -void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *span) -{ - int pixelsize = r_swtruecolor ? 4 : 1; - const fixed_t centeryfrac = FLOAT2FIXED(CenterY); - const fixed_t texturemid = FLOAT2FIXED(dc_texturemid); - while (span->Length != 0) - { - const int length = span->Length; - const int top = span->TopOffset; - - // calculate unclipped screen coordinates for post - dc_yl = xs_RoundToInt(sprtopscreen + spryscale * top); - dc_yh = xs_RoundToInt(sprtopscreen + spryscale * (top + length)) - 1; - - if (sprflipvert) - { - swapvalues (dc_yl, dc_yh); - } - - if (dc_yh >= mfloorclip[dc_x]) - { - dc_yh = mfloorclip[dc_x] - 1; - } - if (dc_yl < mceilingclip[dc_x]) - { - dc_yl = mceilingclip[dc_x]; - } - - if (dc_yl <= dc_yh) - { - if (sprflipvert) - { - dc_texturefrac = (dc_yl*dc_iscale) - (top << FRACBITS) - - FixedMul (centeryfrac, dc_iscale) - texturemid; - const fixed_t maxfrac = length << FRACBITS; - while (dc_texturefrac >= maxfrac) - { - if (++dc_yl > dc_yh) - goto nextpost; - dc_texturefrac += dc_iscale; - } - fixed_t endfrac = dc_texturefrac + (dc_yh-dc_yl)*dc_iscale; - while (endfrac < 0) - { - if (--dc_yh < dc_yl) - goto nextpost; - endfrac -= dc_iscale; - } - } - else - { - dc_texturefrac = texturemid - (top << FRACBITS) - + (dc_yl*dc_iscale) - FixedMul (centeryfrac-FRACUNIT, dc_iscale); - while (dc_texturefrac < 0) - { - if (++dc_yl > dc_yh) - goto nextpost; - dc_texturefrac += dc_iscale; - } - fixed_t endfrac = dc_texturefrac + (dc_yh-dc_yl)*dc_iscale; - const fixed_t maxfrac = length << FRACBITS; - if (dc_yh < mfloorclip[dc_x]-1 && endfrac < maxfrac - dc_iscale) - { - dc_yh++; - } - else while (endfrac >= maxfrac) - { - if (--dc_yh < dc_yl) - goto nextpost; - endfrac -= dc_iscale; - } - } - dc_source = column + top; - dc_dest = (ylookup[dc_yl] + dc_x) * pixelsize + dc_destorg; - dc_count = dc_yh - dc_yl + 1; - colfunc (); - } -nextpost: - span++; - } -} - // [ZZ] // R_ClipSpriteColumnWithPortals // @@ -361,7 +273,7 @@ static inline void R_CollectPortals() } } -static inline bool R_ClipSpriteColumnWithPortals(vissprite_t* spr) +bool R_ClipSpriteColumnWithPortals(int x, vissprite_t* spr) { // [ZZ] 10.01.2016: don't clip sprites from the root of a skybox. if (CurrentPortalInSkybox) @@ -380,7 +292,7 @@ static inline bool R_ClipSpriteColumnWithPortals(vissprite_t* spr) continue; // now if current column is covered by this drawseg, we clip it away - if ((dc_x >= seg->x1) && (dc_x < seg->x2)) + if ((x >= seg->x1) && (x < seg->x2)) return true; } @@ -409,15 +321,15 @@ void R_DrawVisSprite (vissprite_t *vis) } fixed_t centeryfrac = FLOAT2FIXED(CenterY); - R_SetColorMapLight(vis->Style.BaseColormap, 0, vis->Style.ColormapNum << FRACBITS); + DrawerContext::SetLight(vis->Style.BaseColormap, 0, vis->Style.ColormapNum << FRACBITS); - mode = R_SetPatchStyle (vis->Style.RenderStyle, vis->Style.Alpha, vis->Translation, vis->FillColor); + mode = DrawerContext::SetPatchStyle (vis->Style.RenderStyle, vis->Style.Alpha, vis->Translation, vis->FillColor); if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Shaded]) { // For shaded sprites, R_SetPatchStyle sets a dc_colormap to an alpha table, but // it is the brightest one. We need to get back to the proper light level for // this sprite. - R_SetColorMapLight(dc_fcolormap, 0, vis->Style.ColormapNum << FRACBITS); + DrawerContext::SetLight(DrawerContext::LightColormap(), 0, vis->Style.ColormapNum << FRACBITS); } if (mode != DontDraw) @@ -436,65 +348,67 @@ void R_DrawVisSprite (vissprite_t *vis) tex = vis->pic; spryscale = vis->yscale; sprflipvert = false; - dc_iscale = FLOAT2FIXED(1 / vis->yscale); + DrawerContext::SetTextureStep(FLOAT2FIXED(1 / vis->yscale)); frac = vis->startfrac; xiscale = vis->xiscale; - dc_texturemid = vis->texturemid; + DrawerContext::SetTextureMid(vis->texturemid); if (vis->renderflags & RF_YFLIP) { sprflipvert = true; spryscale = -spryscale; - dc_iscale = -dc_iscale; - dc_texturemid -= vis->pic->GetHeight(); - sprtopscreen = CenterY + dc_texturemid * spryscale; + DrawerContext::SetTextureStep(-DrawerContext::TextureStep()); + DrawerContext::SetTextureMid(DrawerContext::TextureMid() - vis->pic->GetHeight()); + sprtopscreen = CenterY + DrawerContext::TextureMid() * spryscale; } else { sprflipvert = false; - sprtopscreen = CenterY - dc_texturemid * spryscale; + sprtopscreen = CenterY - DrawerContext::TextureMid() * spryscale; } - dc_x = vis->x1; + int x = vis->x1; x2 = vis->x2; - if (dc_x < x2) + if (x < x2) { - while ((dc_x < stop4) && (dc_x & 3)) + DrawerContext::SetMaskedColumnState(mfloorclip, mceilingclip, spryscale, sprtopscreen, sprflipvert); + + while ((x < stop4) && (x & 3)) { pixels = tex->GetColumn (frac >> FRACBITS, &spans); - if (ispsprite || !R_ClipSpriteColumnWithPortals(vis)) - R_DrawMaskedColumn (pixels, spans); - dc_x++; + if (ispsprite || !R_ClipSpriteColumnWithPortals(x, vis)) + DrawerContext::DrawMaskedColumn (x, pixels, spans); + x++; frac += xiscale; } - while (dc_x < stop4) + while (x < stop4) { - rt_initcols(nullptr); - for (int zz = 4; zz; --zz) + DrawerContext::RtInitCols(nullptr); + for (int zz = 0; zz < 4; ++zz) { pixels = tex->GetColumn (frac >> FRACBITS, &spans); - if (ispsprite || !R_ClipSpriteColumnWithPortals(vis)) - R_DrawMaskedColumnHoriz (pixels, spans); - dc_x++; + if (ispsprite || !R_ClipSpriteColumnWithPortals(x + zz, vis)) + DrawerContext::DrawMaskedColumnHoriz (x + zz, pixels, spans); frac += xiscale; } - rt_draw4cols (dc_x - 4); + DrawerContext::DrawRt4cols(x); + x += 4; } - while (dc_x < x2) + while (x < x2) { pixels = tex->GetColumn (frac >> FRACBITS, &spans); - if (ispsprite || !R_ClipSpriteColumnWithPortals(vis)) - R_DrawMaskedColumn (pixels, spans); - dc_x++; + if (ispsprite || !R_ClipSpriteColumnWithPortals(x, vis)) + DrawerContext::DrawMaskedColumn (x, pixels, spans); + x++; frac += xiscale; } } } - R_FinishSetPatchStyle (); + DrawerContext::FinishSetPatchStyle (); NetUpdate (); } @@ -511,7 +425,7 @@ void R_DrawWallSprite(vissprite_t *spr) WallT.InitFromWallCoords(&spr->wallc); PrepWall(swall, lwall, spr->pic->GetWidth() << FRACBITS, x1, x2); iyscale = 1 / spr->yscale; - dc_texturemid = (spr->gzt - ViewPos.Z) * iyscale; + DrawerContext::SetTextureMid((spr->gzt - ViewPos.Z) * iyscale); if (spr->renderflags & RF_XFLIP) { int right = (spr->pic->GetWidth() << FRACBITS) - 1; @@ -539,11 +453,11 @@ void R_DrawWallSprite(vissprite_t *spr) rw_lightstep = float((GlobVis / spr->wallc.sz2 - rw_lightleft) / (spr->wallc.sx2 - spr->wallc.sx1)); rw_light = rw_lightleft + (x1 - spr->wallc.sx1) * rw_lightstep; if (fixedlightlev >= 0) - R_SetColorMapLight(usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + DrawerContext::SetLight(usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - R_SetColorMapLight(fixedcolormap, 0, 0); + DrawerContext::SetLight(fixedcolormap, 0, 0); else if (!foggy && (spr->renderflags & RF_FULLBRIGHT)) - R_SetColorMapLight(usecolormap, 0, 0); + DrawerContext::SetLight(usecolormap, 0, 0); else calclighting = true; @@ -553,7 +467,7 @@ void R_DrawWallSprite(vissprite_t *spr) { sprflipvert = true; iyscale = -iyscale; - dc_texturemid -= spr->pic->GetHeight(); + DrawerContext::SetTextureMid(DrawerContext::TextureMid() - spr->pic->GetHeight()); } else { @@ -562,10 +476,9 @@ void R_DrawWallSprite(vissprite_t *spr) MaskedScaleY = (float)iyscale; - dc_x = x1; ESPSResult mode; - mode = R_SetPatchStyle (spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor); + mode = DrawerContext::SetPatchStyle (spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor); // R_SetPatchStyle can modify basecolormap. if (rereadcolormap) @@ -581,71 +494,74 @@ void R_DrawWallSprite(vissprite_t *spr) { int stop4; + int x = x1; + if (mode == DoDraw0) { // 1 column at a time - stop4 = dc_x; + stop4 = x; } else // DoDraw1 { // up to 4 columns at a time stop4 = x2 & ~3; } - while ((dc_x < stop4) && (dc_x & 3)) + while ((x < stop4) && (x & 3)) { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap, rw_light, shade); + DrawerContext::SetLight(usecolormap, rw_light, shade); } - if (!R_ClipSpriteColumnWithPortals(spr)) - R_WallSpriteColumn(R_DrawMaskedColumn); - dc_x++; + if (!R_ClipSpriteColumnWithPortals(x, spr)) + R_WallSpriteColumn(x, DrawerContext::DrawMaskedColumn); + x++; } - while (dc_x < stop4) + while (x < stop4) { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap, rw_light, shade); + DrawerContext::SetLight(usecolormap, rw_light, shade); } - rt_initcols(nullptr); - for (int zz = 4; zz; --zz) + DrawerContext::RtInitCols(nullptr); + for (int zz = 0; zz < 4; ++zz) { - if (!R_ClipSpriteColumnWithPortals(spr)) - R_WallSpriteColumn(R_DrawMaskedColumnHoriz); - dc_x++; + if (!R_ClipSpriteColumnWithPortals(x + zz, spr)) + R_WallSpriteColumn(x + zz, DrawerContext::DrawMaskedColumnHoriz); } - rt_draw4cols(dc_x - 4); + DrawerContext::DrawRt4cols(x); + x += 4; } - while (dc_x < x2) + while (x < x2) { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap, rw_light, shade); + DrawerContext::SetLight(usecolormap, rw_light, shade); } - if (!R_ClipSpriteColumnWithPortals(spr)) - R_WallSpriteColumn(R_DrawMaskedColumn); - dc_x++; + if (!R_ClipSpriteColumnWithPortals(x, spr)) + R_WallSpriteColumn(x, DrawerContext::DrawMaskedColumn); + x++; } } - R_FinishSetPatchStyle(); + DrawerContext::FinishSetPatchStyle(); } -void R_WallSpriteColumn (void (*drawfunc)(const BYTE *column, const FTexture::Span *spans)) +void R_WallSpriteColumn (int x, void (*drawfunc)(int x, const BYTE *column, const FTexture::Span *spans)) { - float iscale = swall[dc_x] * MaskedScaleY; - dc_iscale = FLOAT2FIXED(iscale); + float iscale = swall[x] * MaskedScaleY; + DrawerContext::SetTextureStep(FLOAT2FIXED(iscale)); spryscale = 1 / iscale; if (sprflipvert) - sprtopscreen = CenterY + dc_texturemid * spryscale; + sprtopscreen = CenterY + DrawerContext::TextureMid() * spryscale; else - sprtopscreen = CenterY - dc_texturemid * spryscale; + sprtopscreen = CenterY - DrawerContext::TextureMid() * spryscale; const BYTE *column; const FTexture::Span *spans; - column = WallSpriteTile->GetColumn (lwall[dc_x] >> FRACBITS, &spans); - dc_texturefrac = 0; - drawfunc (column, spans); + column = WallSpriteTile->GetColumn (lwall[x] >> FRACBITS, &spans); + DrawerContext::SetTextureFrac(0); + DrawerContext::SetMaskedColumnState(mfloorclip, mceilingclip, spryscale, sprtopscreen, sprflipvert); + drawfunc (x, column, spans); rw_light += rw_lightstep; } @@ -655,18 +571,18 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop int flags = 0; // Do setup for blending. - R_SetColorMapLight(spr->Style.BaseColormap, 0, spr->Style.ColormapNum << FRACBITS); - mode = R_SetPatchStyle(spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor); + DrawerContext::SetLight(spr->Style.BaseColormap, 0, spr->Style.ColormapNum << FRACBITS); + mode = DrawerContext::SetPatchStyle(spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor); if (mode == DontDraw) { return; } - if (colfunc == fuzzcolfunc || colfunc == R_FillColumn) + if (DrawerContext::IsFuzzColumn() || DrawerContext::IsFillColumn()) { flags = DVF_OFFSCREEN | DVF_SPANSONLY; } - else if (colfunc != basecolfunc) + else if (!DrawerContext::IsBaseColumn()) { flags = DVF_OFFSCREEN; } @@ -692,32 +608,32 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop { if (!(flags & DVF_SPANSONLY) && (x & 3) == 0) { - rt_initcols(OffscreenColorBuffer + x * OffscreenBufferHeight); + DrawerContext::RtInitCols(OffscreenColorBuffer + x * OffscreenBufferHeight); } for (FCoverageBuffer::Span *span = OffscreenCoverageBuffer->Spans[x]; span != NULL; span = span->NextSpan) { if (flags & DVF_SPANSONLY) { - dc_x = x; - dc_yl = span->Start; - dc_yh = span->Stop - 1; - dc_count = span->Stop - span->Start; - dc_dest = (ylookup[span->Start] + x) * pixelsize + dc_destorg; - colfunc(); + DrawerContext::SetX(x); + DrawerContext::SetY1(span->Start); + DrawerContext::SetY2(span->Stop - 1); + DrawerContext::SetDrawCount(span->Stop - span->Start); + DrawerContext::SetDest(x, span->Start); + DrawerContext::DrawColumn(); } else { - rt_span_coverage(x, span->Start, span->Stop - 1); + DrawerContext::RtSpanCoverage(x, span->Start, span->Stop - 1); } } if (!(flags & DVF_SPANSONLY) && (x & 3) == 3) { - rt_draw4cols(x - 3); + DrawerContext::DrawRt4cols(x - 3); } } } - R_FinishSetPatchStyle(); + DrawerContext::FinishSetPatchStyle(); NetUpdate(); } @@ -2585,7 +2501,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade, } } -static void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis) +void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis) { const int x1 = vis->x1; const int x2 = vis->x2; @@ -2610,120 +2526,24 @@ static void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis) } } -void R_DrawParticle_C (vissprite_t *vis) +void R_DrawParticle(vissprite_t *vis) { - DWORD *bg2rgb; - int spacing; - BYTE *dest; - DWORD fg; BYTE color = vis->Style.BaseColormap->Maps[(vis->Style.ColormapNum << COLORMAPSHIFT) + vis->startfrac]; int yl = vis->y1; - int ycount = vis->y2 - yl + 1; - int x1 = vis->x1; - int countbase = vis->x2 - x1; - - R_DrawMaskedSegsBehindParticle (vis); - - // vis->renderflags holds translucency level (0-255) - { - fixed_t fglevel, bglevel; - DWORD *fg2rgb; - - fglevel = ((vis->renderflags + 1) << 8) & ~0x3ff; - bglevel = FRACUNIT-fglevel; - fg2rgb = Col2RGB8[fglevel>>10]; - bg2rgb = Col2RGB8[bglevel>>10]; - fg = fg2rgb[color]; - } - - /* - - spacing = RenderTarget->GetPitch() - countbase; - dest = ylookup[yl] + x1 + dc_destorg; - - do - { - int count = countbase; - do - { - DWORD bg = bg2rgb[*dest]; - bg = (fg+bg) | 0x1f07c1f; - *dest++ = RGB32k.All[bg & (bg>>15)]; - } while (--count); - dest += spacing; - } while (--ycount);*/ - - // original was row-wise - // width = countbase - // height = ycount - - spacing = RenderTarget->GetPitch(); - - for (int x = x1; x < (x1+countbase); x++) - { - dc_x = x; - if (R_ClipSpriteColumnWithPortals(vis)) - continue; - dest = ylookup[yl] + x + dc_destorg; - for (int y = 0; y < ycount; y++) - { - DWORD bg = bg2rgb[*dest]; - bg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[bg & (bg>>15)]; - dest += spacing; - } - } -} - -void R_DrawParticle_rgba(vissprite_t *vis) -{ - int spacing; - uint32_t *dest; - BYTE color = vis->Style.BaseColormap->Maps[vis->startfrac]; - int yl = vis->y1; - int ycount = vis->y2 - yl + 1; + int yh = vis->y2; int x1 = vis->x1; int countbase = vis->x2 - x1; R_DrawMaskedSegsBehindParticle(vis); - - DrawerCommandQueue::WaitForWorkers(); - - uint32_t fg = shade_pal_index_simple(color, calc_light_multiplier(LIGHTSCALE(0, vis->Style.ColormapNum << FRACBITS))); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; // vis->renderflags holds translucency level (0-255) - fixed_t fglevel = ((vis->renderflags + 1) << 8) & ~0x3ff; - uint32_t alpha = fglevel * 256 / FRACUNIT; - uint32_t inv_alpha = 256 - alpha; - - fg_red *= alpha; - fg_green *= alpha; - fg_blue *= alpha; - - spacing = RenderTarget->GetPitch(); + int alpha = vis->renderflags; for (int x = x1; x < (x1 + countbase); x++) { - dc_x = x; - if (R_ClipSpriteColumnWithPortals(vis)) + if (R_ClipSpriteColumnWithPortals(x, vis)) continue; - dest = ylookup[yl] + x + (uint32_t*)dc_destorg; - for (int y = 0; y < ycount; y++) - { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red + bg_red * inv_alpha) / 256; - uint32_t green = (fg_green + bg_green * inv_alpha) / 256; - uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += spacing; - } + DrawerContext::FillTransColumn(x, yl, yh, color, alpha); } } @@ -2769,9 +2589,7 @@ void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, sprcosang = FLOAT2FIXED(dasprang.Cos()) >> 2; sprsinang = FLOAT2FIXED(-dasprang.Sin()) >> 2; - R_SetupDrawSlab(colormap); - - int pixelsize = r_swtruecolor ? 4 : 1; + DrawerContext::SetSlabLight(colormap); // Select mip level i = abs(DMulScale6(dasprx - globalposx, cosang, daspry - globalposy, sinang)); @@ -3026,25 +2844,25 @@ void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, if (!(flags & DVF_OFFSCREEN)) { // Draw directly to the screen. - R_DrawSlab(xxr - xxl, yplc[xxl], z2 - z1, yinc, col, (ylookup[z1] + lxt + xxl) * pixelsize + dc_destorg); + DrawerContext::DrawSlab(xxr - xxl, yplc[xxl], z2 - z1, yinc, col, lxt + xxl, z1); } else { // Record the area covered and possibly draw to an offscreen buffer. - dc_yl = z1; - dc_yh = z2 - 1; - dc_count = z2 - z1; - dc_iscale = yinc; + DrawerContext::SetY1(z1); + DrawerContext::SetY2(z2 - 1); + DrawerContext::SetDrawCount(z2 - z1); + DrawerContext::SetTextureStep(yinc); for (int x = xxl; x < xxr; ++x) { OffscreenCoverageBuffer->InsertSpan(lxt + x, z1, z2); if (!(flags & DVF_SPANSONLY)) { - dc_x = lxt + x; - rt_initcols(OffscreenColorBuffer + (dc_x & ~3) * OffscreenBufferHeight); - dc_source = col; - dc_texturefrac = yplc[xxl]; - hcolfunc_pre(); + DrawerContext::RtInitCols(OffscreenColorBuffer + ((lxt + x) & ~3) * OffscreenBufferHeight); + DrawerContext::SetX(lxt + x); + DrawerContext::SetSource(col); + DrawerContext::SetTextureFrac(yplc[xxl]); + DrawerContext::DrawHColumnPre(); } } } diff --git a/src/r_things.h b/src/r_things.h index f5cd30e00..869de4da2 100644 --- a/src/r_things.h +++ b/src/r_things.h @@ -96,11 +96,8 @@ struct vissprite_t struct particle_t; -extern void(*R_DrawParticle)(vissprite_t *); -void R_DrawParticle_C (vissprite_t *); -void R_DrawParticle_rgba (vissprite_t *); - void R_ProjectParticle (particle_t *, const sector_t *sector, int shade, int fakeside); +void R_DrawParticle (vissprite_t *vis); extern int MaxVisSprites; @@ -112,7 +109,6 @@ extern vissprite_t **vissprite_p; extern short zeroarray[MAXWIDTH]; extern short screenheightarray[MAXWIDTH]; -// vars for R_DrawMaskedColumn extern short* mfloorclip; extern short* mceilingclip; extern double spryscale; @@ -126,8 +122,7 @@ extern double pspriteyscale; extern FTexture *WallSpriteTile; -void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *spans); -void R_WallSpriteColumn (void (*drawfunc)(const BYTE *column, const FTexture::Span *spans)); +void R_WallSpriteColumn (int x, void (*drawfunc)(int x, const BYTE *column, const FTexture::Span *spans)); void R_CacheSprite (spritedef_t *sprite); void R_SortVisSprites (int (*compare)(const void *, const void *), size_t first); diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 6a8dad047..21cbd1a33 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -43,8 +43,7 @@ #include "r_defs.h" #include "r_utility.h" #ifndef NO_SWRENDER -#include "r_draw.h" -#include "r_draw_rgba.h" +#include "r_drawer_context.h" #include "r_main.h" #include "r_things.h" #endif @@ -130,12 +129,14 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) static short bottomclipper[MAXWIDTH], topclipper[MAXWIDTH]; const BYTE *translation = NULL; - if (r_swtruecolor != IsBgra()) + DCanvas *destorgsave = DrawerContext::Canvas(); + if (screen->GetBuffer() == NULL) { - r_swtruecolor = IsBgra(); - R_InitColumnDrawers(); + I_FatalError("Attempt to write to buffer of hardware canvas"); } + DrawerContext::SetCanvas(screen); + if (parms.masked) { spanptr = &spans; @@ -172,22 +173,15 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) if (translation != NULL) { - R_SetTranslationMap((lighttable_t *)translation); + DrawerContext::SetTranslationMap((lighttable_t *)translation); } else { - R_SetTranslationMap(identitymap); + DrawerContext::SetTranslationMap(nullptr); } - fixedcolormap = dc_fcolormap; - ESPSResult mode = R_SetPatchStyle (parms.style, parms.Alpha, 0, parms.fillcolor); - - BYTE *destorgsave = dc_destorg; - dc_destorg = screen->GetBuffer(); - if (dc_destorg == NULL) - { - I_FatalError("Attempt to write to buffer of hardware canvas"); - } + fixedcolormap = DrawerContext::LightColormap(); + ESPSResult mode = DrawerContext::SetPatchStyle (parms.style, parms.Alpha, 0, parms.fillcolor); double x0 = parms.x - parms.left * parms.destwidth / parms.texwidth; double y0 = parms.y - parms.top * parms.destheight / parms.texheight; @@ -220,11 +214,11 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) assert(spryscale > 0); sprflipvert = false; - //dc_iscale = FLOAT2FIXED(iyscale); - //dc_texturemid = (-y0) * iyscale; - //dc_iscale = 0xffffffffu / (unsigned)spryscale; - dc_iscale = FLOAT2FIXED(1 / spryscale); - dc_texturemid = (CenterY - 1 - sprtopscreen) * dc_iscale / 65536; + //DrawerContext::SetTextureStep(FLOAT2FIXED(iyscale)); + //DrawerContext::SetTextureMid((-y0) * iyscale); + //DrawerContext::SetTextureStep(0xffffffffu / (unsigned)spryscale); + DrawerContext::SetTextureStep(FLOAT2FIXED(1 / spryscale)); + DrawerContext::SetTextureMid((CenterY - 1 - sprtopscreen) * DrawerContext::TextureStep() / 65536); fixed_t frac = 0; double xiscale = img->GetWidth() / parms.destwidth; double x2 = x0 + parms.destwidth; @@ -278,14 +272,14 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) mode = DoDraw0; } - dc_x = int(x0); + int x = int(x0); int x2_i = int(x2); fixed_t xiscale_i = FLOAT2FIXED(xiscale); if (mode == DoDraw0) { // One column at a time - stop4 = dc_x; + stop4 = x; } else // DoDraw1` { @@ -293,42 +287,44 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) stop4 = x2_i & ~3; } - if (dc_x < x2_i) + if (x < x2_i) { - while ((dc_x < stop4) && (dc_x & 3)) + DrawerContext::SetMaskedColumnState(mfloorclip, mceilingclip, spryscale, sprtopscreen, sprflipvert); + + while ((x < stop4) && (x & 3)) { pixels = img->GetColumn(frac >> FRACBITS, spanptr); - R_DrawMaskedColumn(pixels, spans); - dc_x++; + DrawerContext::DrawMaskedColumn(x, pixels, spans); + x++; frac += xiscale_i; } - while (dc_x < stop4) + while (x < stop4) { - rt_initcols(nullptr); - for (int zz = 4; zz; --zz) + DrawerContext::RtInitCols(nullptr); + for (int zz = 0; zz < 4; ++zz) { pixels = img->GetColumn(frac >> FRACBITS, spanptr); - R_DrawMaskedColumnHoriz(pixels, spans); - dc_x++; + DrawerContext::DrawMaskedColumnHoriz(x + zz, pixels, spans); frac += xiscale_i; } - rt_draw4cols(dc_x - 4); + DrawerContext::DrawRt4cols(x); + x += 4; } - while (dc_x < x2_i) + while (x < x2_i) { pixels = img->GetColumn(frac >> FRACBITS, spanptr); - R_DrawMaskedColumn(pixels, spans); - dc_x++; + DrawerContext::DrawMaskedColumn(x, pixels, spans); + x++; frac += xiscale_i; } } CenterY = centeryback; } - R_FinishSetPatchStyle (); + DrawerContext::FinishSetPatchStyle (); - dc_destorg = destorgsave; + DrawerContext::SetCanvas(destorgsave); if (ticdup != 0 && menuactive == MENU_Off) { @@ -1024,9 +1020,11 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) if (IsBgra()) { + int inv_level = 64 - level; + uint32_t *spot = (uint32_t*)GetBuffer() + oldyyshifted + xx; - uint32_t fg = shade_pal_index_simple(basecolor, calc_light_multiplier(0)); + uint32_t fg = GPalette.BaseColors[basecolor].d; uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1035,9 +1033,9 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) uint32_t bg_green = (*spot >> 8) & 0xff; uint32_t bg_blue = (*spot) & 0xff; - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; + uint32_t red = (fg_red * level + bg_red * inv_level + 1) / 64; + uint32_t green = (fg_green * level + bg_green * inv_level + 1) / 64; + uint32_t blue = (fg_blue * level + bg_blue * inv_level + 1) / 64; *spot = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -1399,16 +1397,15 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, sinrot = sin(rotation.Radians()); // Setup constant texture mapping parameters. - R_SetupSpanBits(tex); if (colormap) - R_SetSpanColormap(colormap, clamp(shade >> FRACBITS, 0, NUMCOLORMAPS - 1)); + DrawerContext::SetSpanLight(colormap, 0, clamp(shade >> FRACBITS, 0, NUMCOLORMAPS - 1)); else - R_SetSpanColormap(&identitycolormap, 0); - R_SetSpanSource(r_swtruecolor ? (const BYTE*)tex->GetPixelsBgra() : tex->GetPixels()); - scalex = double(1u << (32 - ds_xbits)) / scalex; - scaley = double(1u << (32 - ds_ybits)) / scaley; - ds_xstep = xs_RoundToInt(cosrot * scalex); - ds_ystep = xs_RoundToInt(sinrot * scaley); + DrawerContext::SetSpanLight(nullptr, 0, 0); + DrawerContext::SetSpanSource(tex); + scalex = double(1u << (32 - DrawerContext::SpanXBits())) / scalex; + scaley = double(1u << (32 - DrawerContext::SpanYBits())) / scaley; + DrawerContext::SetSpanXStep(xs_RoundToInt(cosrot * scalex)); + DrawerContext::SetSpanYStep(xs_RoundToInt(sinrot * scaley)); // Travel down the right edge and create an outline of that edge. pt1 = toppt; @@ -1472,9 +1469,9 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, #if 0 memset(this->Buffer + y * this->Pitch + x1, (int)tex, x2 - x1); #else - ds_y = y; - ds_x1 = x1; - ds_x2 = x2 - 1; + DrawerContext::SetSpanY(y); + DrawerContext::SetSpanX1(x1); + DrawerContext::SetSpanX2(x2 - 1); DVector2 tex(x1 - originx, y - originy); if (dorotate) @@ -1483,10 +1480,10 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, tex.X = t * cosrot - tex.Y * sinrot; tex.Y = tex.Y * cosrot + t * sinrot; } - ds_xfrac = xs_RoundToInt(tex.X * scalex); - ds_yfrac = xs_RoundToInt(tex.Y * scaley); + DrawerContext::SetSpanXFrac(xs_RoundToInt(tex.X * scalex)); + DrawerContext::SetSpanYFrac(xs_RoundToInt(tex.Y * scaley)); - R_DrawSpan(); + DrawerContext::DrawSimplePolySpan(); #endif } x += xinc; From 70dbde4f78a99f167c3dcfc41491cb3f5cfc6cc8 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 17 Jun 2016 07:38:05 +0200 Subject: [PATCH 47/94] Added r_multithreaded CVAR and fixed some broken colors --- src/r_draw_rgba.cpp | 2057 +++++++++++++++++++++--------------------- src/r_draw_rgba.h | 8 +- src/r_drawt_rgba.cpp | 540 +++++------ 3 files changed, 1314 insertions(+), 1291 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 420b63dff..3e4bf241a 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -58,6 +58,8 @@ extern float rw_light; extern float rw_lightstep; extern int wallshade; +CVAR(Bool, r_multithreaded, true, 0) + ///////////////////////////////////////////////////////////////////////////// DrawerCommandQueue *DrawerCommandQueue::Instance() @@ -229,28 +231,28 @@ void DrawerCommandQueue::StopThreads() class DrawColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_texturefrac; - DWORD dc_iscale; - fixed_t dc_light; - const BYTE *dc_source; - int dc_pitch; - ShadeConstants dc_shade_constants; - BYTE *dc_colormap; + int _count; + BYTE *_dest; + DWORD _texturefrac; + DWORD _iscale; + fixed_t _light; + const BYTE *_source; + int _pitch; + ShadeConstants _shade_constants; + BYTE *_colormap; public: DrawColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_texturefrac = ::dc_texturefrac; - dc_iscale = ::dc_iscale; - dc_light = ::dc_light; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; - dc_colormap = ::dc_colormap; + _count = dc_count; + _dest = dc_dest; + _texturefrac = dc_texturefrac; + _iscale = dc_iscale; + _light = dc_light; + _source = dc_source; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _colormap = dc_colormap; } void Execute(DrawerThread *thread) override @@ -260,28 +262,28 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); // Zero length, column does not exceed a pixel. if (count <= 0) return; // Framebuffer destination address. - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; // Determine scaling, // which is the only mapping to be done. - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); // [RH] Get local copies of these variables so that the compiler // has a better chance of optimizing this well. - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - BYTE *colormap = dc_colormap; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + BYTE *colormap = _colormap; do { @@ -296,20 +298,20 @@ public: class FillColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - fixed_t dc_light; - int dc_pitch; - int dc_color; + int _count; + BYTE *_dest; + fixed_t _light; + int _pitch; + int _color; public: FillColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_light = ::dc_light; - dc_pitch = ::dc_pitch; - dc_color = ::dc_color; + _count = dc_count; + _dest = dc_dest; + _light = dc_light; + _pitch = dc_pitch; + _color = dc_color; } void Execute(DrawerThread *thread) override @@ -317,18 +319,18 @@ public: int count; uint32_t* dest; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - uint32_t light = calc_light_multiplier(dc_light); + uint32_t light = calc_light_multiplier(_light); { - int pitch = dc_pitch * thread->num_cores; - uint32_t color = shade_pal_index_simple(dc_color, light); + int pitch = _pitch * thread->num_cores; + uint32_t color = shade_pal_index_simple(_color, light); do { @@ -341,20 +343,20 @@ public: class FillAddColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - int dc_pitch; - fixed_t dc_light; - int dc_color; + int _count; + BYTE *_dest; + int _pitch; + fixed_t _light; + int _color; public: FillAddColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_color = ::dc_color; + _count = dc_count; + _dest = dc_dest; + _pitch = dc_pitch; + _light = dc_light; + _color = dc_color; } void Execute(DrawerThread *thread) override @@ -362,14 +364,14 @@ public: int count; uint32_t *dest; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -392,20 +394,20 @@ public: class FillAddClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - int dc_pitch; - fixed_t dc_light; - int dc_color; + int _count; + BYTE *_dest; + int _pitch; + fixed_t _light; + int _color; public: FillAddClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_color = ::dc_color; + _count = dc_count; + _dest = dc_dest; + _pitch = dc_pitch; + _light = dc_light; + _color = dc_color; } void Execute(DrawerThread *thread) override @@ -413,14 +415,14 @@ public: int count; uint32_t *dest; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -443,20 +445,20 @@ public: class FillSubClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - int dc_pitch; - int dc_color; - fixed_t dc_light; + int _count; + BYTE *_dest; + int _pitch; + int _color; + fixed_t _light; public: FillSubClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_pitch = ::dc_pitch; - dc_color = ::dc_color; - dc_light = ::dc_light; + _count = dc_count; + _dest = dc_dest; + _pitch = dc_pitch; + _color = dc_color; + _light = dc_light; } void Execute(DrawerThread *thread) override @@ -464,14 +466,14 @@ public: int count; uint32_t *dest; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -494,20 +496,20 @@ public: class FillRevSubClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - int dc_pitch; - int dc_color; - fixed_t dc_light; + int _count; + BYTE *_dest; + int _pitch; + int _color; + fixed_t _light; public: FillRevSubClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_pitch = ::dc_pitch; - dc_color = ::dc_color; - dc_light = ::dc_light; + _count = dc_count; + _dest = dc_dest; + _pitch = dc_pitch; + _color = dc_color; + _light = dc_light; } void Execute(DrawerThread *thread) override @@ -515,14 +517,14 @@ public: int count; uint32_t *dest; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -545,22 +547,22 @@ public: class DrawFuzzColumnRGBACommand : public DrawerCommand { - int dc_x; - int dc_yl; - int dc_yh; - BYTE *dc_destorg; - int dc_pitch; + int _x; + int _yl; + int _yh; + BYTE *_destorg; + int _pitch; int fuzzpos; int fuzzviewheight; public: DrawFuzzColumnRGBACommand() { - dc_x = ::dc_x; - dc_yl = ::dc_yl; - dc_yh = ::dc_yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; + _x = dc_x; + _yl = dc_yl; + _yh = dc_yh; + _destorg = dc_destorg; + _pitch = dc_pitch; fuzzpos = ::fuzzpos; fuzzviewheight = ::fuzzviewheight; } @@ -571,24 +573,24 @@ public: uint32_t *dest; // Adjust borders. Low... - if (dc_yl == 0) - dc_yl = 1; + if (_yl == 0) + _yl = 1; // .. and high. - if (dc_yh > fuzzviewheight) - dc_yh = fuzzviewheight; + if (_yh > fuzzviewheight) + _yh = fuzzviewheight; - count = thread->count_for_thread(dc_yl, dc_yh - dc_yl + 1); + count = thread->count_for_thread(_yl, _yh - _yl + 1); // Zero length. if (count <= 0) return; - dest = thread->dest_for_thread(dc_yl, dc_pitch, ylookup[dc_yl] + dc_x + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(_yl, _pitch, ylookup[_yl] + _x + (uint32_t*)_destorg); - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; int fuzzstep = thread->num_cores; - int fuzz = (fuzzpos + thread->skipped_by_thread(dc_yl)) % FUZZTABLE; + int fuzz = (fuzzpos + thread->skipped_by_thread(_yl)) % FUZZTABLE; while (count > 0) { @@ -622,32 +624,32 @@ public: class DrawAddColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - BYTE *dc_colormap; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + BYTE *_colormap; public: DrawAddColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_colormap = ::dc_colormap; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _colormap = dc_colormap; } void Execute(DrawerThread *thread) override @@ -657,25 +659,25 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - BYTE *colormap = dc_colormap; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; + BYTE *colormap = _colormap; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -703,28 +705,28 @@ public: class DrawTranslatedColumnRGBACommand : public DrawerCommand { - int dc_count; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - BYTE *dc_translation; - const BYTE *dc_source; - int dc_pitch; + int _count; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + BYTE *_translation; + const BYTE *_source; + int _pitch; public: DrawTranslatedColumnRGBACommand() { - dc_count = ::dc_count; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_translation = ::dc_translation; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; + _count = dc_count; + _light = dc_light; + _shade_constants = dc_shade_constants; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _translation = dc_translation; + _source = dc_source; + _pitch = dc_pitch; } void Execute(DrawerThread *thread) override @@ -734,23 +736,23 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { // [RH] Local copies of global vars to improve compiler optimizations - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; + BYTE *translation = _translation; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; do { @@ -764,28 +766,32 @@ public: class DrawTlatedAddColumnRGBACommand : public DrawerCommand { - int dc_count; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - BYTE *dc_translation; - const BYTE *dc_source; - int dc_pitch; + int _count; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + BYTE *_translation; + const BYTE *_source; + int _pitch; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawTlatedAddColumnRGBACommand() { - dc_count = ::dc_count; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_translation = ::dc_translation; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; + _count = dc_count; + _light = dc_light; + _shade_constants = dc_shade_constants; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _translation = dc_translation; + _source = dc_source; + _pitch = dc_pitch; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -795,25 +801,25 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; + BYTE *translation = _translation; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -842,28 +848,28 @@ public: class DrawShadedColumnRGBACommand : public DrawerCommand { private: - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - fixed_t dc_light; - const BYTE *dc_source; - lighttable_t *dc_colormap; - int dc_color; - int dc_pitch; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + fixed_t _light; + const BYTE *_source; + lighttable_t *_colormap; + int _color; + int _pitch; public: DrawShadedColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_light = ::dc_light; - dc_source = ::dc_source; - dc_colormap = ::dc_colormap; - dc_color = ::dc_color; - dc_pitch = ::dc_pitch; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _light = dc_light; + _source = dc_source; + _colormap = dc_colormap; + _color = dc_color; + _pitch = dc_pitch; } void Execute(DrawerThread *thread) override @@ -872,25 +878,25 @@ public: uint32_t *dest; fixed_t frac, fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; { - const BYTE *source = dc_source; - BYTE *colormap = dc_colormap; - int pitch = dc_pitch * thread->num_cores; + const BYTE *source = _source; + BYTE *colormap = _colormap; + int pitch = _pitch * thread->num_cores; do { @@ -915,30 +921,30 @@ public: class DrawAddClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawAddClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -948,23 +954,23 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -991,32 +997,32 @@ public: class DrawAddClampTranslatedColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - BYTE *dc_translation; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + BYTE *_translation; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawAddClampTranslatedColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_translation = ::dc_translation; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _translation = dc_translation; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -1026,24 +1032,24 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + BYTE *translation = _translation; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -1070,30 +1076,30 @@ public: class DrawSubClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawSubClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -1103,23 +1109,23 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -1146,32 +1152,32 @@ public: class DrawSubClampTranslatedColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - BYTE *dc_translation; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + BYTE *_translation; public: DrawSubClampTranslatedColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_translation = ::dc_translation; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _translation = dc_translation; } void Execute(DrawerThread *thread) override @@ -1181,24 +1187,24 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + BYTE *translation = _translation; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -1225,30 +1231,30 @@ public: class DrawRevSubClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawRevSubClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -1258,22 +1264,22 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -1300,32 +1306,32 @@ public: class DrawRevSubClampTranslatedColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - BYTE *dc_translation; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + BYTE *_translation; public: DrawRevSubClampTranslatedColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_translation = ::dc_translation; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _translation = dc_translation; } void Execute(DrawerThread *thread) override @@ -1335,24 +1341,24 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + BYTE *translation = _translation; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -1379,42 +1385,42 @@ public: class DrawSpanRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_x1; - int ds_x2; - int ds_y; - int ds_xbits; - int ds_ybits; - BYTE *dc_destorg; - fixed_t ds_light; - ShadeConstants ds_shade_constants; + const uint32_t *_source; + fixed_t _xfrac; + fixed_t _yfrac; + fixed_t _xstep; + fixed_t _ystep; + int _x1; + int _x2; + int _y; + int _xbits; + int _ybits; + BYTE *_destorg; + fixed_t _light; + ShadeConstants _shade_constants; public: DrawSpanRGBACommand() { - ds_source = (const uint32_t*)::ds_source; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; - dc_destorg = ::dc_destorg; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; + _source = (const uint32_t*)ds_source; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _xstep = ds_xstep; + _ystep = ds_ystep; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xbits = ds_xbits; + _ybits = ds_ybits; + _destorg = dc_destorg; + _light = ds_light; + _shade_constants = ds_shade_constants; } #ifdef NO_SSE void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1422,24 +1428,24 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. @@ -1458,9 +1464,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { @@ -1479,7 +1485,7 @@ public: #else void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1487,24 +1493,24 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. @@ -1604,9 +1610,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; int sse_count = count / 4; count -= sse_count * 4; @@ -1700,42 +1706,42 @@ public: class DrawSpanMaskedRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_light; - ShadeConstants ds_shade_constants; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - BYTE *dc_destorg; - int ds_x1; - int ds_x2; - int ds_y1; - int ds_y; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_xbits; - int ds_ybits; + const uint32_t *_source; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _xfrac; + fixed_t _yfrac; + BYTE *_destorg; + int _x1; + int _x2; + int _y1; + int _y; + fixed_t _xstep; + fixed_t _ystep; + int _xbits; + int _ybits; public: DrawSpanMaskedRGBACommand() { - ds_source = (const uint32_t*)::ds_source; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - dc_destorg = ::dc_destorg; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; + _source = (const uint32_t*)ds_source; + _light = ds_light; + _shade_constants = ds_shade_constants; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _destorg = dc_destorg; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xstep = ds_xstep; + _ystep = ds_ystep; + _xbits = ds_xbits; + _ybits = ds_ybits; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1743,24 +1749,24 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. do @@ -1780,9 +1786,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { uint32_t texdata; @@ -1803,42 +1809,46 @@ public: class DrawSpanTranslucentRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_light; - ShadeConstants ds_shade_constants; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - BYTE *dc_destorg; - int ds_x1; - int ds_x2; - int ds_y1; - int ds_y; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_xbits; - int ds_ybits; + const uint32_t *_source; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _xfrac; + fixed_t _yfrac; + BYTE *_destorg; + int _x1; + int _x2; + int _y1; + int _y; + fixed_t _xstep; + fixed_t _ystep; + int _xbits; + int _ybits; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawSpanTranslucentRGBACommand() { - ds_source = (const uint32_t *)::ds_source; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - dc_destorg = ::dc_destorg; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; + _source = (const uint32_t *)ds_source; + _light = ds_light; + _shade_constants = ds_shade_constants; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _destorg = dc_destorg; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xstep = ds_xstep; + _ystep = ds_ystep; + _xbits = ds_xbits; + _ybits = ds_ybits; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1846,27 +1856,27 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. do @@ -1894,9 +1904,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); @@ -1925,42 +1935,46 @@ public: class DrawSpanMaskedTranslucentRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_light; - ShadeConstants ds_shade_constants; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - BYTE *dc_destorg; - int ds_x1; - int ds_x2; - int ds_y1; - int ds_y; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_xbits; - int ds_ybits; + const uint32_t *_source; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _xfrac; + fixed_t _yfrac; + BYTE *_destorg; + int _x1; + int _x2; + int _y1; + int _y; + fixed_t _xstep; + fixed_t _ystep; + int _xbits; + int _ybits; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawSpanMaskedTranslucentRGBACommand() { - ds_source = (const uint32_t*)::ds_source; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - dc_destorg = ::dc_destorg; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; + _source = (const uint32_t*)ds_source; + _light = ds_light; + _shade_constants = ds_shade_constants; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _destorg = dc_destorg; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xstep = ds_xstep; + _ystep = ds_ystep; + _xbits = ds_xbits; + _ybits = ds_ybits; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1968,27 +1982,27 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. do @@ -2021,9 +2035,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { uint32_t texdata; @@ -2057,42 +2071,46 @@ public: class DrawSpanAddClampRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_light; - ShadeConstants ds_shade_constants; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - BYTE *dc_destorg; - int ds_x1; - int ds_x2; - int ds_y1; - int ds_y; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_xbits; - int ds_ybits; + const uint32_t *_source; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _xfrac; + fixed_t _yfrac; + BYTE *_destorg; + int _x1; + int _x2; + int _y1; + int _y; + fixed_t _xstep; + fixed_t _ystep; + int _xbits; + int _ybits; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawSpanAddClampRGBACommand() { - ds_source = (const uint32_t*)::ds_source; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - dc_destorg = ::dc_destorg; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; + _source = (const uint32_t*)ds_source; + _light = ds_light; + _shade_constants = ds_shade_constants; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _destorg = dc_destorg; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xstep = ds_xstep; + _ystep = ds_ystep; + _xbits = ds_xbits; + _ybits = ds_ybits; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -2100,27 +2118,27 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. do @@ -2148,9 +2166,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); @@ -2179,42 +2197,46 @@ public: class DrawSpanMaskedAddClampRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_light; - ShadeConstants ds_shade_constants; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - BYTE *dc_destorg; - int ds_x1; - int ds_x2; - int ds_y1; - int ds_y; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_xbits; - int ds_ybits; + const uint32_t *_source; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _xfrac; + fixed_t _yfrac; + BYTE *_destorg; + int _x1; + int _x2; + int _y1; + int _y; + fixed_t _xstep; + fixed_t _ystep; + int _xbits; + int _ybits; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawSpanMaskedAddClampRGBACommand() { - ds_source = (const uint32_t*)::ds_source; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - dc_destorg = ::dc_destorg; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; + _source = (const uint32_t*)ds_source; + _light = ds_light; + _shade_constants = ds_shade_constants; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _destorg = dc_destorg; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xstep = ds_xstep; + _ystep = ds_ystep; + _xbits = ds_xbits; + _ybits = ds_ybits; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -2222,27 +2244,27 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. do @@ -2275,9 +2297,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { uint32_t texdata; @@ -2311,33 +2333,33 @@ public: class FillSpanRGBACommand : public DrawerCommand { - int ds_x1; - int ds_x2; - int ds_y; - BYTE *dc_destorg; - fixed_t ds_light; - int ds_color; + int _x1; + int _x2; + int _y; + BYTE *_destorg; + fixed_t _light; + int _color; public: FillSpanRGBACommand() { - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - dc_destorg = ::dc_destorg; - ds_light = ::ds_light; - ds_color = ::ds_color; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _destorg = dc_destorg; + _light = ds_light; + _color = ds_color; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; - uint32_t *dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - int count = (ds_x2 - ds_x1 + 1); - uint32_t light = calc_light_multiplier(ds_light); - uint32_t color = shade_pal_index_simple(ds_color, light); + uint32_t *dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; + int count = (_x2 - _x1 + 1); + uint32_t light = calc_light_multiplier(_light); + uint32_t color = shade_pal_index_simple(_color, light); for (int i = 0; i < count; i++) dest[i] = color; } @@ -2345,45 +2367,45 @@ public: class Vlinec1RGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int vlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; public: Vlinec1RGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; vlinebits = ::vlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = vlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; do { @@ -2396,10 +2418,10 @@ public: class Vlinec4RGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; int vlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -2409,10 +2431,10 @@ class Vlinec4RGBACommand : public DrawerCommand public: Vlinec4RGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; vlinebits = ::vlinebits; for (int i = 0; i < 4; i++) { @@ -2426,12 +2448,12 @@ public: #ifdef NO_SSE void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = vlinebits; DWORD place; @@ -2440,11 +2462,11 @@ public: uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -2463,24 +2485,24 @@ public: #else void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = vlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; uint32_t light0 = calc_light_multiplier(palookuplight[0]); uint32_t light1 = calc_light_multiplier(palookuplight[1]); uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -2545,45 +2567,45 @@ public: class Mvlinec1RGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int mvlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; public: Mvlinec1RGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; mvlinebits = ::mvlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = mvlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; do { @@ -2600,10 +2622,10 @@ public: class Mvlinec4RGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; int mvlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -2613,10 +2635,10 @@ class Mvlinec4RGBACommand : public DrawerCommand public: Mvlinec4RGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; mvlinebits = ::mvlinebits; for (int i = 0; i < 4; i++) { @@ -2630,12 +2652,12 @@ public: #ifdef NO_SSE void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = mvlinebits; DWORD place; @@ -2644,11 +2666,11 @@ public: uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -2668,12 +2690,12 @@ public: #else void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = mvlinebits; uint32_t light0 = calc_light_multiplier(palookuplight[0]); @@ -2681,11 +2703,11 @@ public: uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -2756,52 +2778,52 @@ public: class Tmvline1AddRGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int tmvlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: Tmvline1AddRGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; tmvlinebits = ::tmvlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = tmvlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -2831,12 +2853,12 @@ public: class Tmvline4AddRGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; int tmvlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -2846,12 +2868,12 @@ class Tmvline4AddRGBACommand : public DrawerCommand public: Tmvline4AddRGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; tmvlinebits = ::tmvlinebits; for (int i = 0; i < 4; i++) { @@ -2864,12 +2886,12 @@ public: void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -2878,14 +2900,14 @@ public: light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -2923,52 +2945,52 @@ public: class Tmvline1AddClampRGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int tmvlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: Tmvline1AddClampRGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; tmvlinebits = ::tmvlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = tmvlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -2998,12 +3020,12 @@ public: class Tmvline4AddClampRGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; int tmvlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -3013,12 +3035,12 @@ class Tmvline4AddClampRGBACommand : public DrawerCommand public: Tmvline4AddClampRGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; tmvlinebits = ::tmvlinebits; for (int i = 0; i < 4; i++) { @@ -3031,12 +3053,12 @@ public: void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -3045,14 +3067,14 @@ public: light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -3090,52 +3112,52 @@ public: class Tmvline1SubClampRGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int tmvlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: Tmvline1SubClampRGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; tmvlinebits = ::tmvlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = tmvlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -3165,12 +3187,12 @@ public: class Tmvline4SubClampRGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; int tmvlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -3180,12 +3202,12 @@ class Tmvline4SubClampRGBACommand : public DrawerCommand public: Tmvline4SubClampRGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; tmvlinebits = ::tmvlinebits; for (int i = 0; i < 4; i++) { @@ -3198,12 +3220,12 @@ public: void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -3212,14 +3234,14 @@ public: light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -3257,52 +3279,52 @@ public: class Tmvline1RevSubClampRGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int tmvlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: Tmvline1RevSubClampRGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; tmvlinebits = ::tmvlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = tmvlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -3332,12 +3354,12 @@ public: class Tmvline4RevSubClampRGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; int tmvlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -3347,12 +3369,12 @@ class Tmvline4RevSubClampRGBACommand : public DrawerCommand public: Tmvline4RevSubClampRGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; tmvlinebits = ::tmvlinebits; for (int i = 0; i < 4; i++) { @@ -3365,12 +3387,12 @@ public: void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -3379,14 +3401,14 @@ public: light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -3427,9 +3449,9 @@ class DrawFogBoundaryLineRGBACommand : public DrawerCommand int _y; int _x; int _x2; - BYTE *dc_destorg; - fixed_t dc_light; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + fixed_t _light; + ShadeConstants _shade_constants; public: DrawFogBoundaryLineRGBACommand(int y, int x, int x2) @@ -3438,9 +3460,9 @@ public: _x = x; _x2 = x2; - dc_destorg = ::dc_destorg; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _light = dc_light; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override @@ -3452,10 +3474,10 @@ public: int x = _x; int x2 = _x2; - uint32_t *dest = ylookup[y] + (uint32_t*)dc_destorg; + uint32_t *dest = ylookup[y] + (uint32_t*)_destorg; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants constants = _shade_constants; do { @@ -3499,10 +3521,10 @@ class DrawTiltedSpanRGBACommand : public DrawerCommand int _y; int _x1; int _x2; - BYTE *dc_destorg; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - const BYTE *ds_source; + BYTE *_destorg; + fixed_t _light; + ShadeConstants _shade_constants; + const BYTE *_source; public: DrawTiltedSpanRGBACommand(int y, int x1, int x2) @@ -3511,8 +3533,8 @@ public: _x1 = x1; _x2 = x2; - dc_destorg = ::dc_destorg; - ds_source = ::ds_source; + _destorg = dc_destorg; + _source = ds_source; } void Execute(DrawerThread *thread) override @@ -3527,8 +3549,8 @@ public: // Slopes are broken currently in master. // Until R_DrawTiltedPlane is fixed we are just going to fill with a solid color. - uint32_t *source = (uint32_t*)ds_source; - uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; + uint32_t *source = (uint32_t*)_source; + uint32_t *dest = ylookup[y] + x1 + (uint32_t*)_destorg; int count = x2 - x1 + 1; while (count > 0) @@ -3544,9 +3566,9 @@ class DrawColoredSpanRGBACommand : public DrawerCommand int _y; int _x1; int _x2; - BYTE *dc_destorg; - fixed_t ds_light; - int ds_color; + BYTE *_destorg; + fixed_t _light; + int _color; public: DrawColoredSpanRGBACommand(int y, int x1, int x2) @@ -3555,9 +3577,9 @@ public: _x1 = x1; _x2 = x2; - dc_destorg = ::dc_destorg; - ds_light = ::ds_light; - ds_color = ::ds_color; + _destorg = dc_destorg; + _light = ds_light; + _color = ds_color; } void Execute(DrawerThread *thread) override @@ -3569,10 +3591,10 @@ public: int x1 = _x1; int x2 = _x2; - uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; + uint32_t *dest = ylookup[y] + x1 + (uint32_t*)_destorg; int count = (x2 - x1 + 1); - uint32_t light = calc_light_multiplier(ds_light); - uint32_t color = shade_pal_index_simple(ds_color, light); + uint32_t light = calc_light_multiplier(_light); + uint32_t color = shade_pal_index_simple(_color, light); for (int i = 0; i < count; i++) dest[i] = color; } @@ -3585,10 +3607,9 @@ class FillTransColumnRGBACommand : public DrawerCommand int _y2; int _color; int _a; - BYTE *dc_destorg; - int dc_pitch; - fixed_t ds_light; - int ds_color; + BYTE *_destorg; + int _pitch; + fixed_t _light; public: FillTransColumnRGBACommand(int x, int y1, int y2, int color, int a) @@ -3599,8 +3620,8 @@ public: _color = color; _a = a; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; + _destorg = dc_destorg; + _pitch = dc_pitch; } void Execute(DrawerThread *thread) override @@ -3627,8 +3648,8 @@ public: fg_green *= alpha; fg_blue *= alpha; - int spacing = dc_pitch * thread->num_cores; - uint32_t *dest = thread->dest_for_thread(y1, dc_pitch, ylookup[y1] + x + (uint32_t*)dc_destorg); + int spacing = _pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(y1, _pitch, ylookup[y1] + x + (uint32_t*)_destorg); for (int y = 0; y < ycount; y++) { diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index a91b54d74..6e35de9ff 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -178,17 +178,19 @@ public: class DrawerCommand { protected: - int dc_dest_y; + int _dest_y; public: DrawerCommand() { - dc_dest_y = static_cast((dc_dest - dc_destorg) / (dc_pitch * 4)); + _dest_y = static_cast((dc_dest - dc_destorg) / (dc_pitch * 4)); } virtual void Execute(DrawerThread *thread) = 0; }; +EXTERN_CVAR(Bool, r_multithreaded) + // Manages queueing up commands and executing them on worker threads class DrawerCommandQueue { @@ -232,7 +234,7 @@ public: static void QueueCommand(Types &&... args) { auto queue = Instance(); - if (queue->threaded_render == 0) + if (queue->threaded_render == 0 || !r_multithreaded) { T command(std::forward(args)...); command.Execute(&queue->single_core_thread); diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 4b6605b4a..0eabc48d8 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -61,8 +61,8 @@ class RtCopy1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; + BYTE *_destorg; + int _pitch; public: RtCopy1colRGBACommand(int hx, int sx, int yl, int yh) @@ -72,8 +72,8 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; + _destorg = dc_destorg; + _pitch = dc_pitch; } void Execute(DrawerThread *thread) override @@ -87,9 +87,9 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = thread->num_cores * 4; if (count & 1) { @@ -123,11 +123,11 @@ class RtMap1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - BYTE *dc_destorg; - int dc_pitch; - BYTE *dc_colormap; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE *_destorg; + int _pitch; + BYTE *_colormap; public: RtMap1colRGBACommand(int hx, int sx, int yl, int yh) @@ -137,11 +137,11 @@ public: this->yl = yl; this->yh = yh; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_colormap = ::dc_colormap; + _light = dc_light; + _shade_constants = dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _colormap = dc_colormap; } void Execute(DrawerThread *thread) override @@ -156,15 +156,15 @@ public: if (count <= 0) return; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = thread->num_cores * 4; - BYTE *colormap = dc_colormap; + BYTE *colormap = _colormap; if (count & 1) { *dest = shade_pal_index(colormap[*source], light, shade_constants); @@ -188,11 +188,11 @@ class RtMap4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - BYTE *dc_destorg; - int dc_pitch; - BYTE *colormap; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE *_destorg; + int _pitch; + BYTE *_colormap; public: RtMap4colsRGBACommand(int sx, int yl, int yh) @@ -201,11 +201,11 @@ public: this->yl = yl; this->yh = yh; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_colormap = ::dc_colormap; + _light = dc_light; + _shade_constants = dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _colormap = dc_colormap; } #ifdef NO_SSE @@ -221,15 +221,15 @@ public: if (count <= 0) return; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = thread->num_cores * 4; - BYTE *colormap = dc_colormap; + BYTE *colormap = _colormap; if (count & 1) { dest[0] = shade_pal_index(colormap[source[0]], light, shade_constants); @@ -268,16 +268,16 @@ public: if (count <= 0) return; - ShadeConstants shade_constants = dc_shade_constants; - uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = _shade_constants; + uint32_t light = calc_light_multiplier(_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = thread->num_cores * 4; - BYTE *colormap = dc_colormap; + BYTE *colormap = _colormap; if (shade_constants.simple_shade) { @@ -509,13 +509,13 @@ class RtAdd1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - BYTE *dc_colormap; + BYTE *_destorg; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + BYTE *_colormap; public: RtAdd1colRGBACommand(int hx, int sx, int yl, int yh) @@ -525,13 +525,13 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_colormap = ::dc_colormap; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _colormap = dc_colormap; } void Execute(DrawerThread *thread) override @@ -546,17 +546,17 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - BYTE *colormap = dc_colormap; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; + BYTE *colormap = _colormap; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t fg = shade_pal_index(colormap[*source], light, shade_constants); @@ -585,13 +585,13 @@ class RtAdd4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - BYTE *dc_colormap; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_destorg; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE *_colormap; + fixed_t _srcalpha; + fixed_t _destalpha; public: RtAdd4colsRGBACommand(int sx, int yl, int yh) @@ -600,13 +600,13 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_colormap = ::dc_colormap; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _colormap = dc_colormap; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } #ifdef NO_SSE @@ -622,17 +622,17 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - BYTE *colormap = dc_colormap; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; + BYTE *colormap = _colormap; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { for (int i = 0; i < 4; i++) @@ -670,19 +670,19 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); + uint32_t light = calc_light_multiplier(_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; - BYTE *colormap = dc_colormap; + BYTE *colormap = _colormap; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; if (shade_constants.simple_shade) { @@ -766,11 +766,11 @@ class RtShaded1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - lighttable_t *dc_colormap; - BYTE *dc_destorg; - int dc_pitch; - int dc_color; - fixed_t dc_light; + lighttable_t *_colormap; + BYTE *_destorg; + int _pitch; + int _color; + fixed_t _light; public: RtShaded1colRGBACommand(int hx, int sx, int yl, int yh) @@ -780,11 +780,11 @@ public: this->yl = yl; this->yh = yh; - dc_colormap = ::dc_colormap; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_color = ::dc_color; - dc_light = ::dc_light; + _colormap = dc_colormap; + _destorg = dc_destorg; + _pitch = dc_pitch; + _color = dc_color; + _light = dc_light; } void Execute(DrawerThread *thread) override @@ -800,13 +800,13 @@ public: if (count <= 0) return; - colormap = dc_colormap; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + colormap = _colormap; + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -835,11 +835,11 @@ class RtShaded4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - lighttable_t *dc_colormap; - int dc_color; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; + lighttable_t *_colormap; + int _color; + BYTE *_destorg; + int _pitch; + fixed_t _light; public: RtShaded4colsRGBACommand(int sx, int yl, int yh) @@ -848,11 +848,11 @@ public: this->yl = yl; this->yh = yh; - dc_colormap = ::dc_colormap; - dc_color = ::dc_color; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; + _colormap = dc_colormap; + _color = dc_color; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; } #ifdef NO_SSE @@ -869,13 +869,13 @@ public: if (count <= 0) return; - colormap = dc_colormap; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + colormap = _colormap; + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -914,13 +914,13 @@ public: if (count <= 0) return; - colormap = dc_colormap; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + colormap = _colormap; + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light))), _mm_setzero_si128()); + __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(_color, calc_light_multiplier(_light))), _mm_setzero_si128()); __m128i alpha_one = _mm_set1_epi16(64); do { @@ -959,12 +959,12 @@ class RtAddClamp1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_destorg; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: RtAddClamp1colRGBACommand(int hx, int sx, int yl, int yh) @@ -974,12 +974,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -994,16 +994,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t fg = shade_pal_index(*source, light, shade_constants); @@ -1031,12 +1031,12 @@ class RtAddClamp4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; public: RtAddClamp4colsRGBACommand(int sx, int yl, int yh) @@ -1045,12 +1045,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; } #ifdef NO_SSE @@ -1066,16 +1066,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { for (int i = 0; i < 4; i++) @@ -1112,18 +1112,18 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); + uint32_t light = calc_light_multiplier(_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; if (shade_constants.simple_shade) { @@ -1207,12 +1207,12 @@ class RtSubClamp1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; public: RtSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) @@ -1222,12 +1222,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override @@ -1242,16 +1242,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t fg = shade_pal_index(*source, light, shade_constants); @@ -1279,12 +1279,12 @@ class RtSubClamp4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; public: RtSubClamp4colsRGBACommand(int sx, int yl, int yh) @@ -1293,12 +1293,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override @@ -1313,16 +1313,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { for (int i = 0; i < 4; i++) @@ -1355,12 +1355,12 @@ class RtRevSubClamp1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; public: RtRevSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) @@ -1370,12 +1370,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override @@ -1390,16 +1390,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t fg = shade_pal_index(*source, light, shade_constants); @@ -1427,12 +1427,12 @@ class RtRevSubClamp4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; public: RtRevSubClamp4colsRGBACommand(int sx, int yl, int yh) @@ -1441,12 +1441,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override @@ -1461,16 +1461,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { for (int i = 0; i < 4; i++) @@ -1515,29 +1515,29 @@ public: class DrawColumnHorizRGBACommand : public DrawerCommand { - int dc_count; - fixed_t dc_iscale; - fixed_t dc_texturefrac; - const BYTE *dc_source; - int dc_x; - int dc_yl; - int dc_yh; + int _count; + fixed_t _iscale; + fixed_t _texturefrac; + const BYTE *_source; + int _x; + int _yl; + int _yh; public: DrawColumnHorizRGBACommand() { - dc_count = ::dc_count; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_x = ::dc_x; - dc_yl = ::dc_yl; - dc_yh = ::dc_yh; + _count = dc_count; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _x = dc_x; + _yl = dc_yl; + _yh = dc_yh; } void Execute(DrawerThread *thread) override { - int count = dc_count; + int count = _count; uint32_t *dest; fixed_t fracstep; fixed_t frac; @@ -1546,13 +1546,13 @@ public: return; { - int x = dc_x & 3; - dest = &thread->dc_temp_rgba[x + 4 * dc_yl]; + int x = _x & 3; + dest = &thread->dc_temp_rgba[x + 4 * _yl]; } - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = _iscale; + frac = _texturefrac; - const BYTE *source = dc_source; + const BYTE *source = _source; if (count & 1) { *dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep; @@ -1589,34 +1589,34 @@ public: class FillColumnHorizRGBACommand : public DrawerCommand { - int dc_x; - int dc_yl; - int dc_yh; - int dc_count; - int dc_color; + int _x; + int _yl; + int _yh; + int _count; + int _color; public: FillColumnHorizRGBACommand() { - dc_x = ::dc_x; - dc_count = ::dc_count; - dc_color = ::dc_color; - dc_yl = ::dc_yl; - dc_yh = ::dc_yh; + _x = dc_x; + _count = dc_count; + _color = dc_color; + _yl = dc_yl; + _yh = dc_yh; } void Execute(DrawerThread *thread) override { - int count = dc_count; - int color = dc_color; + int count = _count; + int color = _color; uint32_t *dest; if (count <= 0) return; { - int x = dc_x & 3; - dest = &thread->dc_temp_rgba[x + 4 * dc_yl]; + int x = _x & 3; + dest = &thread->dc_temp_rgba[x + 4 * _yl]; } if (count & 1) { From fee8650357e87ee6fc19a622133ec95d9ca9f364 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 17 Jun 2016 08:02:41 +0200 Subject: [PATCH 48/94] Add r_multithreaded and fix color issue --- src/r_draw_rgba.cpp | 2159 +++++++++++++++++++++++------------------- src/r_draw_rgba.h | 4 +- src/r_drawt_rgba.cpp | 540 +++++------ 3 files changed, 1439 insertions(+), 1264 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 2062609b4..722fbb8cd 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -56,6 +56,8 @@ extern float rw_light; extern float rw_lightstep; extern int wallshade; +CVAR(Bool, r_multithreaded, true, 0) + ///////////////////////////////////////////////////////////////////////////// DrawerCommandQueue *DrawerCommandQueue::Instance() @@ -227,28 +229,28 @@ void DrawerCommandQueue::StopThreads() class DrawColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_texturefrac; - DWORD dc_iscale; - fixed_t dc_light; - const BYTE *dc_source; - int dc_pitch; - ShadeConstants dc_shade_constants; - BYTE *dc_colormap; + int _count; + BYTE *_dest; + DWORD _texturefrac; + DWORD _iscale; + fixed_t _light; + const BYTE *_source; + int _pitch; + ShadeConstants _shade_constants; + BYTE *_colormap; public: DrawColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_texturefrac = ::dc_texturefrac; - dc_iscale = ::dc_iscale; - dc_light = ::dc_light; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; - dc_colormap = ::dc_colormap; + _count = dc_count; + _dest = dc_dest; + _texturefrac = dc_texturefrac; + _iscale = dc_iscale; + _light = dc_light; + _source = dc_source; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _colormap = dc_colormap; } void Execute(DrawerThread *thread) override @@ -258,28 +260,28 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); // Zero length, column does not exceed a pixel. if (count <= 0) return; // Framebuffer destination address. - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; // Determine scaling, // which is the only mapping to be done. - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); // [RH] Get local copies of these variables so that the compiler // has a better chance of optimizing this well. - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - BYTE *colormap = dc_colormap; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + BYTE *colormap = _colormap; do { @@ -294,20 +296,20 @@ public: class FillColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - fixed_t dc_light; - int dc_pitch; - int dc_color; + int _count; + BYTE *_dest; + fixed_t _light; + int _pitch; + int _color; public: FillColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_light = ::dc_light; - dc_pitch = ::dc_pitch; - dc_color = ::dc_color; + _count = dc_count; + _dest = dc_dest; + _light = dc_light; + _pitch = dc_pitch; + _color = dc_color; } void Execute(DrawerThread *thread) override @@ -315,18 +317,18 @@ public: int count; uint32_t* dest; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - uint32_t light = calc_light_multiplier(dc_light); + uint32_t light = calc_light_multiplier(_light); { - int pitch = dc_pitch * thread->num_cores; - uint32_t color = shade_pal_index_simple(dc_color, light); + int pitch = _pitch * thread->num_cores; + uint32_t color = shade_pal_index_simple(_color, light); do { @@ -339,20 +341,20 @@ public: class FillAddColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - int dc_pitch; - fixed_t dc_light; - int dc_color; + int _count; + BYTE *_dest; + int _pitch; + fixed_t _light; + int _color; public: FillAddColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_color = ::dc_color; + _count = dc_count; + _dest = dc_dest; + _pitch = dc_pitch; + _light = dc_light; + _color = dc_color; } void Execute(DrawerThread *thread) override @@ -360,14 +362,14 @@ public: int count; uint32_t *dest; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -390,20 +392,20 @@ public: class FillAddClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - int dc_pitch; - fixed_t dc_light; - int dc_color; + int _count; + BYTE *_dest; + int _pitch; + fixed_t _light; + int _color; public: FillAddClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_color = ::dc_color; + _count = dc_count; + _dest = dc_dest; + _pitch = dc_pitch; + _light = dc_light; + _color = dc_color; } void Execute(DrawerThread *thread) override @@ -411,14 +413,14 @@ public: int count; uint32_t *dest; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -441,20 +443,20 @@ public: class FillSubClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - int dc_pitch; - int dc_color; - fixed_t dc_light; + int _count; + BYTE *_dest; + int _pitch; + int _color; + fixed_t _light; public: FillSubClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_pitch = ::dc_pitch; - dc_color = ::dc_color; - dc_light = ::dc_light; + _count = dc_count; + _dest = dc_dest; + _pitch = dc_pitch; + _color = dc_color; + _light = dc_light; } void Execute(DrawerThread *thread) override @@ -462,14 +464,14 @@ public: int count; uint32_t *dest; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -492,20 +494,20 @@ public: class FillRevSubClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - int dc_pitch; - int dc_color; - fixed_t dc_light; + int _count; + BYTE *_dest; + int _pitch; + int _color; + fixed_t _light; public: FillRevSubClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_pitch = ::dc_pitch; - dc_color = ::dc_color; - dc_light = ::dc_light; + _count = dc_count; + _dest = dc_dest; + _pitch = dc_pitch; + _color = dc_color; + _light = dc_light; } void Execute(DrawerThread *thread) override @@ -513,14 +515,14 @@ public: int count; uint32_t *dest; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -543,22 +545,22 @@ public: class DrawFuzzColumnRGBACommand : public DrawerCommand { - int dc_x; - int dc_yl; - int dc_yh; - BYTE *dc_destorg; - int dc_pitch; + int _x; + int _yl; + int _yh; + BYTE *_destorg; + int _pitch; int fuzzpos; int fuzzviewheight; public: DrawFuzzColumnRGBACommand() { - dc_x = ::dc_x; - dc_yl = ::dc_yl; - dc_yh = ::dc_yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; + _x = dc_x; + _yl = dc_yl; + _yh = dc_yh; + _destorg = dc_destorg; + _pitch = dc_pitch; fuzzpos = ::fuzzpos; fuzzviewheight = ::fuzzviewheight; } @@ -569,24 +571,24 @@ public: uint32_t *dest; // Adjust borders. Low... - if (dc_yl == 0) - dc_yl = 1; + if (_yl == 0) + _yl = 1; // .. and high. - if (dc_yh > fuzzviewheight) - dc_yh = fuzzviewheight; + if (_yh > fuzzviewheight) + _yh = fuzzviewheight; - count = thread->count_for_thread(dc_yl, dc_yh - dc_yl + 1); + count = thread->count_for_thread(_yl, _yh - _yl + 1); // Zero length. if (count <= 0) return; - dest = thread->dest_for_thread(dc_yl, dc_pitch, ylookup[dc_yl] + dc_x + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(_yl, _pitch, ylookup[_yl] + _x + (uint32_t*)_destorg); - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; int fuzzstep = thread->num_cores; - int fuzz = (fuzzpos + thread->skipped_by_thread(dc_yl)) % FUZZTABLE; + int fuzz = (fuzzpos + thread->skipped_by_thread(_yl)) % FUZZTABLE; while (count > 0) { @@ -620,32 +622,32 @@ public: class DrawAddColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - BYTE *dc_colormap; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + BYTE *_colormap; public: DrawAddColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_colormap = ::dc_colormap; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _colormap = dc_colormap; } void Execute(DrawerThread *thread) override @@ -655,25 +657,25 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - BYTE *colormap = dc_colormap; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; + BYTE *colormap = _colormap; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -701,28 +703,28 @@ public: class DrawTranslatedColumnRGBACommand : public DrawerCommand { - int dc_count; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - BYTE *dc_translation; - const BYTE *dc_source; - int dc_pitch; + int _count; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + BYTE *_translation; + const BYTE *_source; + int _pitch; public: DrawTranslatedColumnRGBACommand() { - dc_count = ::dc_count; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_translation = ::dc_translation; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; + _count = dc_count; + _light = dc_light; + _shade_constants = dc_shade_constants; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _translation = dc_translation; + _source = dc_source; + _pitch = dc_pitch; } void Execute(DrawerThread *thread) override @@ -732,23 +734,23 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { // [RH] Local copies of global vars to improve compiler optimizations - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; + BYTE *translation = _translation; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; do { @@ -762,28 +764,32 @@ public: class DrawTlatedAddColumnRGBACommand : public DrawerCommand { - int dc_count; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - BYTE *dc_translation; - const BYTE *dc_source; - int dc_pitch; + int _count; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + BYTE *_translation; + const BYTE *_source; + int _pitch; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawTlatedAddColumnRGBACommand() { - dc_count = ::dc_count; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_translation = ::dc_translation; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; + _count = dc_count; + _light = dc_light; + _shade_constants = dc_shade_constants; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _translation = dc_translation; + _source = dc_source; + _pitch = dc_pitch; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -793,25 +799,25 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; + BYTE *translation = _translation; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -840,28 +846,28 @@ public: class DrawShadedColumnRGBACommand : public DrawerCommand { private: - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - fixed_t dc_light; - const BYTE *dc_source; - lighttable_t *dc_colormap; - int dc_color; - int dc_pitch; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + fixed_t _light; + const BYTE *_source; + lighttable_t *_colormap; + int _color; + int _pitch; public: DrawShadedColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_light = ::dc_light; - dc_source = ::dc_source; - dc_colormap = ::dc_colormap; - dc_color = ::dc_color; - dc_pitch = ::dc_pitch; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _light = dc_light; + _source = dc_source; + _colormap = dc_colormap; + _color = dc_color; + _pitch = dc_pitch; } void Execute(DrawerThread *thread) override @@ -870,25 +876,25 @@ public: uint32_t *dest; fixed_t frac, fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; { - const BYTE *source = dc_source; - BYTE *colormap = dc_colormap; - int pitch = dc_pitch * thread->num_cores; + const BYTE *source = _source; + BYTE *colormap = _colormap; + int pitch = _pitch * thread->num_cores; do { @@ -913,30 +919,30 @@ public: class DrawAddClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawAddClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -946,23 +952,23 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -989,32 +995,32 @@ public: class DrawAddClampTranslatedColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - BYTE *dc_translation; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + BYTE *_translation; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawAddClampTranslatedColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_translation = ::dc_translation; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _translation = dc_translation; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -1024,24 +1030,24 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + BYTE *translation = _translation; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -1068,30 +1074,30 @@ public: class DrawSubClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawSubClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -1101,23 +1107,23 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -1144,32 +1150,32 @@ public: class DrawSubClampTranslatedColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - BYTE *dc_translation; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + BYTE *_translation; public: DrawSubClampTranslatedColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_translation = ::dc_translation; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _translation = dc_translation; } void Execute(DrawerThread *thread) override @@ -1179,24 +1185,24 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + BYTE *translation = _translation; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -1223,30 +1229,30 @@ public: class DrawRevSubClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawRevSubClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -1256,22 +1262,22 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -1298,32 +1304,32 @@ public: class DrawRevSubClampTranslatedColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - BYTE *dc_translation; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + BYTE *_translation; public: DrawRevSubClampTranslatedColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_translation = ::dc_translation; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _translation = dc_translation; } void Execute(DrawerThread *thread) override @@ -1333,24 +1339,24 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + BYTE *translation = _translation; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -1377,42 +1383,42 @@ public: class DrawSpanRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_x1; - int ds_x2; - int ds_y; - int ds_xbits; - int ds_ybits; - BYTE *dc_destorg; - fixed_t ds_light; - ShadeConstants ds_shade_constants; + const uint32_t *_source; + fixed_t _xfrac; + fixed_t _yfrac; + fixed_t _xstep; + fixed_t _ystep; + int _x1; + int _x2; + int _y; + int _xbits; + int _ybits; + BYTE *_destorg; + fixed_t _light; + ShadeConstants _shade_constants; public: DrawSpanRGBACommand() { - ds_source = (const uint32_t*)::ds_source; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; - dc_destorg = ::dc_destorg; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; + _source = (const uint32_t*)ds_source; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _xstep = ds_xstep; + _ystep = ds_ystep; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xbits = ds_xbits; + _ybits = ds_ybits; + _destorg = dc_destorg; + _light = ds_light; + _shade_constants = ds_shade_constants; } #ifdef NO_SSE void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1420,24 +1426,24 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. @@ -1456,9 +1462,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { @@ -1477,7 +1483,7 @@ public: #else void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1485,24 +1491,24 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. @@ -1602,9 +1608,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; int sse_count = count / 4; count -= sse_count * 4; @@ -1698,42 +1704,42 @@ public: class DrawSpanMaskedRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_light; - ShadeConstants ds_shade_constants; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - BYTE *dc_destorg; - int ds_x1; - int ds_x2; - int ds_y1; - int ds_y; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_xbits; - int ds_ybits; + const uint32_t *_source; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _xfrac; + fixed_t _yfrac; + BYTE *_destorg; + int _x1; + int _x2; + int _y1; + int _y; + fixed_t _xstep; + fixed_t _ystep; + int _xbits; + int _ybits; public: DrawSpanMaskedRGBACommand() { - ds_source = (const uint32_t*)::ds_source; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - dc_destorg = ::dc_destorg; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; + _source = (const uint32_t*)ds_source; + _light = ds_light; + _shade_constants = ds_shade_constants; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _destorg = dc_destorg; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xstep = ds_xstep; + _ystep = ds_ystep; + _xbits = ds_xbits; + _ybits = ds_ybits; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1741,24 +1747,24 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. do @@ -1778,9 +1784,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { uint32_t texdata; @@ -1801,42 +1807,46 @@ public: class DrawSpanTranslucentRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_light; - ShadeConstants ds_shade_constants; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - BYTE *dc_destorg; - int ds_x1; - int ds_x2; - int ds_y1; - int ds_y; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_xbits; - int ds_ybits; + const uint32_t *_source; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _xfrac; + fixed_t _yfrac; + BYTE *_destorg; + int _x1; + int _x2; + int _y1; + int _y; + fixed_t _xstep; + fixed_t _ystep; + int _xbits; + int _ybits; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawSpanTranslucentRGBACommand() { - ds_source = (const uint32_t *)::ds_source; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - dc_destorg = ::dc_destorg; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; + _source = (const uint32_t *)ds_source; + _light = ds_light; + _shade_constants = ds_shade_constants; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _destorg = dc_destorg; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xstep = ds_xstep; + _ystep = ds_ystep; + _xbits = ds_xbits; + _ybits = ds_ybits; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1844,27 +1854,27 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. do @@ -1892,9 +1902,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); @@ -1923,42 +1933,46 @@ public: class DrawSpanMaskedTranslucentRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_light; - ShadeConstants ds_shade_constants; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - BYTE *dc_destorg; - int ds_x1; - int ds_x2; - int ds_y1; - int ds_y; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_xbits; - int ds_ybits; + const uint32_t *_source; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _xfrac; + fixed_t _yfrac; + BYTE *_destorg; + int _x1; + int _x2; + int _y1; + int _y; + fixed_t _xstep; + fixed_t _ystep; + int _xbits; + int _ybits; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawSpanMaskedTranslucentRGBACommand() { - ds_source = (const uint32_t*)::ds_source; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - dc_destorg = ::dc_destorg; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; + _source = (const uint32_t*)ds_source; + _light = ds_light; + _shade_constants = ds_shade_constants; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _destorg = dc_destorg; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xstep = ds_xstep; + _ystep = ds_ystep; + _xbits = ds_xbits; + _ybits = ds_ybits; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1966,27 +1980,27 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. do @@ -2019,9 +2033,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { uint32_t texdata; @@ -2055,42 +2069,46 @@ public: class DrawSpanAddClampRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_light; - ShadeConstants ds_shade_constants; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - BYTE *dc_destorg; - int ds_x1; - int ds_x2; - int ds_y1; - int ds_y; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_xbits; - int ds_ybits; + const uint32_t *_source; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _xfrac; + fixed_t _yfrac; + BYTE *_destorg; + int _x1; + int _x2; + int _y1; + int _y; + fixed_t _xstep; + fixed_t _ystep; + int _xbits; + int _ybits; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawSpanAddClampRGBACommand() { - ds_source = (const uint32_t*)::ds_source; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - dc_destorg = ::dc_destorg; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; + _source = (const uint32_t*)ds_source; + _light = ds_light; + _shade_constants = ds_shade_constants; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _destorg = dc_destorg; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xstep = ds_xstep; + _ystep = ds_ystep; + _xbits = ds_xbits; + _ybits = ds_ybits; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -2098,27 +2116,27 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. do @@ -2146,9 +2164,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); @@ -2177,42 +2195,46 @@ public: class DrawSpanMaskedAddClampRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_light; - ShadeConstants ds_shade_constants; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - BYTE *dc_destorg; - int ds_x1; - int ds_x2; - int ds_y1; - int ds_y; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_xbits; - int ds_ybits; + const uint32_t *_source; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _xfrac; + fixed_t _yfrac; + BYTE *_destorg; + int _x1; + int _x2; + int _y1; + int _y; + fixed_t _xstep; + fixed_t _ystep; + int _xbits; + int _ybits; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawSpanMaskedAddClampRGBACommand() { - ds_source = (const uint32_t*)::ds_source; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - dc_destorg = ::dc_destorg; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; + _source = (const uint32_t*)ds_source; + _light = ds_light; + _shade_constants = ds_shade_constants; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _destorg = dc_destorg; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xstep = ds_xstep; + _ystep = ds_ystep; + _xbits = ds_xbits; + _ybits = ds_ybits; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -2220,27 +2242,27 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. do @@ -2273,9 +2295,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { uint32_t texdata; @@ -2309,33 +2331,33 @@ public: class FillSpanRGBACommand : public DrawerCommand { - int ds_x1; - int ds_x2; - int ds_y; - BYTE *dc_destorg; - fixed_t ds_light; - int ds_color; + int _x1; + int _x2; + int _y; + BYTE *_destorg; + fixed_t _light; + int _color; public: FillSpanRGBACommand() { - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - dc_destorg = ::dc_destorg; - ds_light = ::ds_light; - ds_color = ::ds_color; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _destorg = dc_destorg; + _light = ds_light; + _color = ds_color; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; - uint32_t *dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - int count = (ds_x2 - ds_x1 + 1); - uint32_t light = calc_light_multiplier(ds_light); - uint32_t color = shade_pal_index_simple(ds_color, light); + uint32_t *dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; + int count = (_x2 - _x1 + 1); + uint32_t light = calc_light_multiplier(_light); + uint32_t color = shade_pal_index_simple(_color, light); for (int i = 0; i < count; i++) dest[i] = color; } @@ -2343,45 +2365,45 @@ public: class Vlinec1RGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int vlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; public: Vlinec1RGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; vlinebits = ::vlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = vlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; do { @@ -2394,10 +2416,10 @@ public: class Vlinec4RGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; int vlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -2407,10 +2429,10 @@ class Vlinec4RGBACommand : public DrawerCommand public: Vlinec4RGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; vlinebits = ::vlinebits; for (int i = 0; i < 4; i++) { @@ -2424,12 +2446,12 @@ public: #ifdef NO_SSE void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = vlinebits; DWORD place; @@ -2438,11 +2460,11 @@ public: uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -2461,24 +2483,24 @@ public: #else void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = vlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; uint32_t light0 = calc_light_multiplier(palookuplight[0]); uint32_t light1 = calc_light_multiplier(palookuplight[1]); uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -2543,45 +2565,45 @@ public: class Mvlinec1RGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int mvlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; public: Mvlinec1RGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; mvlinebits = ::mvlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = mvlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; do { @@ -2598,10 +2620,10 @@ public: class Mvlinec4RGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; int mvlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -2611,10 +2633,10 @@ class Mvlinec4RGBACommand : public DrawerCommand public: Mvlinec4RGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; mvlinebits = ::mvlinebits; for (int i = 0; i < 4; i++) { @@ -2628,12 +2650,12 @@ public: #ifdef NO_SSE void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = mvlinebits; DWORD place; @@ -2642,11 +2664,11 @@ public: uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -2666,12 +2688,12 @@ public: #else void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = mvlinebits; uint32_t light0 = calc_light_multiplier(palookuplight[0]); @@ -2679,11 +2701,11 @@ public: uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -2754,52 +2776,52 @@ public: class Tmvline1AddRGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int tmvlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: Tmvline1AddRGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; tmvlinebits = ::tmvlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = tmvlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -2829,12 +2851,12 @@ public: class Tmvline4AddRGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; int tmvlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -2844,12 +2866,12 @@ class Tmvline4AddRGBACommand : public DrawerCommand public: Tmvline4AddRGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; tmvlinebits = ::tmvlinebits; for (int i = 0; i < 4; i++) { @@ -2862,12 +2884,12 @@ public: void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -2876,14 +2898,14 @@ public: light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -2921,52 +2943,52 @@ public: class Tmvline1AddClampRGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int tmvlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: Tmvline1AddClampRGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; tmvlinebits = ::tmvlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = tmvlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -2996,12 +3018,12 @@ public: class Tmvline4AddClampRGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; int tmvlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -3011,12 +3033,12 @@ class Tmvline4AddClampRGBACommand : public DrawerCommand public: Tmvline4AddClampRGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; tmvlinebits = ::tmvlinebits; for (int i = 0; i < 4; i++) { @@ -3029,12 +3051,12 @@ public: void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -3043,14 +3065,14 @@ public: light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -3088,52 +3110,52 @@ public: class Tmvline1SubClampRGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int tmvlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: Tmvline1SubClampRGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; tmvlinebits = ::tmvlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = tmvlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -3163,12 +3185,12 @@ public: class Tmvline4SubClampRGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; int tmvlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -3178,12 +3200,12 @@ class Tmvline4SubClampRGBACommand : public DrawerCommand public: Tmvline4SubClampRGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; tmvlinebits = ::tmvlinebits; for (int i = 0; i < 4; i++) { @@ -3196,12 +3218,12 @@ public: void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -3210,14 +3232,14 @@ public: light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -3255,52 +3277,52 @@ public: class Tmvline1RevSubClampRGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int tmvlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: Tmvline1RevSubClampRGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; tmvlinebits = ::tmvlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = tmvlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -3330,12 +3352,12 @@ public: class Tmvline4RevSubClampRGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; int tmvlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -3345,12 +3367,12 @@ class Tmvline4RevSubClampRGBACommand : public DrawerCommand public: Tmvline4RevSubClampRGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; tmvlinebits = ::tmvlinebits; for (int i = 0; i < 4; i++) { @@ -3363,12 +3385,12 @@ public: void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -3377,14 +3399,14 @@ public: light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -3425,9 +3447,9 @@ class DrawFogBoundaryLineRGBACommand : public DrawerCommand int _y; int _x; int _x2; - BYTE *dc_destorg; - fixed_t dc_light; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + fixed_t _light; + ShadeConstants _shade_constants; public: DrawFogBoundaryLineRGBACommand(int y, int x, int x2) @@ -3436,9 +3458,9 @@ public: _x = x; _x2 = x2; - dc_destorg = ::dc_destorg; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _light = dc_light; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override @@ -3450,10 +3472,10 @@ public: int x = _x; int x2 = _x2; - uint32_t *dest = ylookup[y] + (uint32_t*)dc_destorg; + uint32_t *dest = ylookup[y] + (uint32_t*)_destorg; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants constants = _shade_constants; do { @@ -3492,6 +3514,157 @@ public: } }; +class DrawTiltedSpanRGBACommand : public DrawerCommand +{ + int _y; + int _x1; + int _x2; + BYTE *_destorg; + fixed_t _light; + ShadeConstants _shade_constants; + const BYTE *_source; + +public: + DrawTiltedSpanRGBACommand(int y, int x1, int x2) + { + _y = y; + _x1 = x1; + _x2 = x2; + + _destorg = dc_destorg; + _source = ds_source; + } + + void Execute(DrawerThread *thread) override + { + if (thread->line_skipped_by_thread(_y)) + return; + + int y = _y; + int x1 = _x1; + int x2 = _x2; + + // Slopes are broken currently in master. + // Until R_DrawTiltedPlane is fixed we are just going to fill with a solid color. + + uint32_t *source = (uint32_t*)_source; + uint32_t *dest = ylookup[y] + x1 + (uint32_t*)_destorg; + + int count = x2 - x1 + 1; + while (count > 0) + { + *(dest++) = source[0]; + count--; + } + } +}; + +class DrawColoredSpanRGBACommand : public DrawerCommand +{ + int _y; + int _x1; + int _x2; + BYTE *_destorg; + fixed_t _light; + int _color; + +public: + DrawColoredSpanRGBACommand(int y, int x1, int x2) + { + _y = y; + _x1 = x1; + _x2 = x2; + + _destorg = dc_destorg; + _light = ds_light; + _color = ds_color; + } + + void Execute(DrawerThread *thread) override + { + if (thread->line_skipped_by_thread(_y)) + return; + + int y = _y; + int x1 = _x1; + int x2 = _x2; + + uint32_t *dest = ylookup[y] + x1 + (uint32_t*)_destorg; + int count = (x2 - x1 + 1); + uint32_t light = calc_light_multiplier(_light); + uint32_t color = shade_pal_index_simple(_color, light); + for (int i = 0; i < count; i++) + dest[i] = color; + } +}; + +class FillTransColumnRGBACommand : public DrawerCommand +{ + int _x; + int _y1; + int _y2; + int _color; + int _a; + BYTE *_destorg; + int _pitch; + fixed_t _light; + +public: + FillTransColumnRGBACommand(int x, int y1, int y2, int color, int a) + { + _x = x; + _y1 = y1; + _y2 = y2; + _color = color; + _a = a; + + _destorg = dc_destorg; + _pitch = dc_pitch; + } + + void Execute(DrawerThread *thread) override + { + int x = _x; + int y1 = _y1; + int y2 = _y2; + int color = _color; + int a = _a; + + int ycount = thread->count_for_thread(y1, y2 - y1 + 1); + if (ycount <= 0) + return; + + uint32_t fg = GPalette.BaseColors[color].d; + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t alpha = a + 1; + uint32_t inv_alpha = 256 - alpha; + + fg_red *= alpha; + fg_green *= alpha; + fg_blue *= alpha; + + int spacing = _pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(y1, _pitch, ylookup[y1] + x + (uint32_t*)_destorg); + + for (int y = 0; y < ycount; y++) + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red * inv_alpha) / 256; + uint32_t green = (fg_green + bg_green * inv_alpha) / 256; + uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += spacing; + } + } +}; + ApplySpecialColormapRGBACommand::ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen) { buffer = screen->GetBuffer(); diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 47ea75260..0ab3e298a 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -184,6 +184,8 @@ public: virtual void Execute(DrawerThread *thread) = 0; }; +EXTERN_CVAR(Bool, r_multithreaded) + // Manages queueing up commands and executing them on worker threads class DrawerCommandQueue { @@ -227,7 +229,7 @@ public: static void QueueCommand(Types &&... args) { auto queue = Instance(); - if (queue->threaded_render == 0) + if (queue->threaded_render == 0 || !r_multithreaded) { T command(std::forward(args)...); command.Execute(&queue->single_core_thread); diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index d2d715c8d..2311cb447 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -59,8 +59,8 @@ class RtCopy1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; + BYTE *_destorg; + int _pitch; public: RtCopy1colRGBACommand(int hx, int sx, int yl, int yh) @@ -70,8 +70,8 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; + _destorg = dc_destorg; + _pitch = dc_pitch; } void Execute(DrawerThread *thread) override @@ -85,9 +85,9 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = thread->num_cores * 4; if (count & 1) { @@ -121,11 +121,11 @@ class RtMap1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - BYTE *dc_destorg; - int dc_pitch; - BYTE *dc_colormap; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE *_destorg; + int _pitch; + BYTE *_colormap; public: RtMap1colRGBACommand(int hx, int sx, int yl, int yh) @@ -135,11 +135,11 @@ public: this->yl = yl; this->yh = yh; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_colormap = ::dc_colormap; + _light = dc_light; + _shade_constants = dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _colormap = dc_colormap; } void Execute(DrawerThread *thread) override @@ -154,15 +154,15 @@ public: if (count <= 0) return; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = thread->num_cores * 4; - BYTE *colormap = dc_colormap; + BYTE *colormap = _colormap; if (count & 1) { *dest = shade_pal_index(colormap[*source], light, shade_constants); @@ -186,11 +186,11 @@ class RtMap4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - BYTE *dc_destorg; - int dc_pitch; - BYTE *colormap; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE *_destorg; + int _pitch; + BYTE *_colormap; public: RtMap4colsRGBACommand(int sx, int yl, int yh) @@ -199,11 +199,11 @@ public: this->yl = yl; this->yh = yh; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_colormap = ::dc_colormap; + _light = dc_light; + _shade_constants = dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _colormap = dc_colormap; } #ifdef NO_SSE @@ -219,15 +219,15 @@ public: if (count <= 0) return; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = thread->num_cores * 4; - BYTE *colormap = dc_colormap; + BYTE *colormap = _colormap; if (count & 1) { dest[0] = shade_pal_index(colormap[source[0]], light, shade_constants); @@ -266,16 +266,16 @@ public: if (count <= 0) return; - ShadeConstants shade_constants = dc_shade_constants; - uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = _shade_constants; + uint32_t light = calc_light_multiplier(_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = thread->num_cores * 4; - BYTE *colormap = dc_colormap; + BYTE *colormap = _colormap; if (shade_constants.simple_shade) { @@ -507,13 +507,13 @@ class RtAdd1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - BYTE *dc_colormap; + BYTE *_destorg; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + BYTE *_colormap; public: RtAdd1colRGBACommand(int hx, int sx, int yl, int yh) @@ -523,13 +523,13 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_colormap = ::dc_colormap; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _colormap = dc_colormap; } void Execute(DrawerThread *thread) override @@ -544,17 +544,17 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - BYTE *colormap = dc_colormap; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; + BYTE *colormap = _colormap; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t fg = shade_pal_index(colormap[*source], light, shade_constants); @@ -583,13 +583,13 @@ class RtAdd4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - BYTE *dc_colormap; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_destorg; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE *_colormap; + fixed_t _srcalpha; + fixed_t _destalpha; public: RtAdd4colsRGBACommand(int sx, int yl, int yh) @@ -598,13 +598,13 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_colormap = ::dc_colormap; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _colormap = dc_colormap; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } #ifdef NO_SSE @@ -620,17 +620,17 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - BYTE *colormap = dc_colormap; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; + BYTE *colormap = _colormap; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { for (int i = 0; i < 4; i++) @@ -668,19 +668,19 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); + uint32_t light = calc_light_multiplier(_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; - BYTE *colormap = dc_colormap; + BYTE *colormap = _colormap; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; if (shade_constants.simple_shade) { @@ -764,11 +764,11 @@ class RtShaded1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - lighttable_t *dc_colormap; - BYTE *dc_destorg; - int dc_pitch; - int dc_color; - fixed_t dc_light; + lighttable_t *_colormap; + BYTE *_destorg; + int _pitch; + int _color; + fixed_t _light; public: RtShaded1colRGBACommand(int hx, int sx, int yl, int yh) @@ -778,11 +778,11 @@ public: this->yl = yl; this->yh = yh; - dc_colormap = ::dc_colormap; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_color = ::dc_color; - dc_light = ::dc_light; + _colormap = dc_colormap; + _destorg = dc_destorg; + _pitch = dc_pitch; + _color = dc_color; + _light = dc_light; } void Execute(DrawerThread *thread) override @@ -798,13 +798,13 @@ public: if (count <= 0) return; - colormap = dc_colormap; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + colormap = _colormap; + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -833,11 +833,11 @@ class RtShaded4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - lighttable_t *dc_colormap; - int dc_color; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; + lighttable_t *_colormap; + int _color; + BYTE *_destorg; + int _pitch; + fixed_t _light; public: RtShaded4colsRGBACommand(int sx, int yl, int yh) @@ -846,11 +846,11 @@ public: this->yl = yl; this->yh = yh; - dc_colormap = ::dc_colormap; - dc_color = ::dc_color; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; + _colormap = dc_colormap; + _color = dc_color; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; } #ifdef NO_SSE @@ -867,13 +867,13 @@ public: if (count <= 0) return; - colormap = dc_colormap; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + colormap = _colormap; + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -912,13 +912,13 @@ public: if (count <= 0) return; - colormap = dc_colormap; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + colormap = _colormap; + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light))), _mm_setzero_si128()); + __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(_color, calc_light_multiplier(_light))), _mm_setzero_si128()); __m128i alpha_one = _mm_set1_epi16(64); do { @@ -957,12 +957,12 @@ class RtAddClamp1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_destorg; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: RtAddClamp1colRGBACommand(int hx, int sx, int yl, int yh) @@ -972,12 +972,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -992,16 +992,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t fg = shade_pal_index(*source, light, shade_constants); @@ -1029,12 +1029,12 @@ class RtAddClamp4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; public: RtAddClamp4colsRGBACommand(int sx, int yl, int yh) @@ -1043,12 +1043,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; } #ifdef NO_SSE @@ -1064,16 +1064,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { for (int i = 0; i < 4; i++) @@ -1110,18 +1110,18 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); + uint32_t light = calc_light_multiplier(_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; if (shade_constants.simple_shade) { @@ -1205,12 +1205,12 @@ class RtSubClamp1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; public: RtSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) @@ -1220,12 +1220,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override @@ -1240,16 +1240,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t fg = shade_pal_index(*source, light, shade_constants); @@ -1277,12 +1277,12 @@ class RtSubClamp4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; public: RtSubClamp4colsRGBACommand(int sx, int yl, int yh) @@ -1291,12 +1291,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override @@ -1311,16 +1311,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { for (int i = 0; i < 4; i++) @@ -1353,12 +1353,12 @@ class RtRevSubClamp1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; public: RtRevSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) @@ -1368,12 +1368,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override @@ -1388,16 +1388,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t fg = shade_pal_index(*source, light, shade_constants); @@ -1425,12 +1425,12 @@ class RtRevSubClamp4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; public: RtRevSubClamp4colsRGBACommand(int sx, int yl, int yh) @@ -1439,12 +1439,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override @@ -1459,16 +1459,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { for (int i = 0; i < 4; i++) @@ -1513,29 +1513,29 @@ public: class DrawColumnHorizRGBACommand : public DrawerCommand { - int dc_count; - fixed_t dc_iscale; - fixed_t dc_texturefrac; - const BYTE *dc_source; - int dc_x; - int dc_yl; - int dc_yh; + int _count; + fixed_t _iscale; + fixed_t _texturefrac; + const BYTE *_source; + int _x; + int _yl; + int _yh; public: DrawColumnHorizRGBACommand() { - dc_count = ::dc_count; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_x = ::dc_x; - dc_yl = ::dc_yl; - dc_yh = ::dc_yh; + _count = dc_count; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _x = dc_x; + _yl = dc_yl; + _yh = dc_yh; } void Execute(DrawerThread *thread) override { - int count = dc_count; + int count = _count; uint32_t *dest; fixed_t fracstep; fixed_t frac; @@ -1544,13 +1544,13 @@ public: return; { - int x = dc_x & 3; - dest = &thread->dc_temp_rgba[x + 4 * dc_yl]; + int x = _x & 3; + dest = &thread->dc_temp_rgba[x + 4 * _yl]; } - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = _iscale; + frac = _texturefrac; - const BYTE *source = dc_source; + const BYTE *source = _source; if (count & 1) { *dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep; @@ -1587,34 +1587,34 @@ public: class FillColumnHorizRGBACommand : public DrawerCommand { - int dc_x; - int dc_yl; - int dc_yh; - int dc_count; - int dc_color; + int _x; + int _yl; + int _yh; + int _count; + int _color; public: FillColumnHorizRGBACommand() { - dc_x = ::dc_x; - dc_count = ::dc_count; - dc_color = ::dc_color; - dc_yl = ::dc_yl; - dc_yh = ::dc_yh; + _x = dc_x; + _count = dc_count; + _color = dc_color; + _yl = dc_yl; + _yh = dc_yh; } void Execute(DrawerThread *thread) override { - int count = dc_count; - int color = dc_color; + int count = _count; + int color = _color; uint32_t *dest; if (count <= 0) return; { - int x = dc_x & 3; - dest = &thread->dc_temp_rgba[x + 4 * dc_yl]; + int x = _x & 3; + dest = &thread->dc_temp_rgba[x + 4 * _yl]; } if (count & 1) { From 3089043b07c206db4a2d05cb27639378147d2851 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 17 Jun 2016 08:28:30 +0200 Subject: [PATCH 49/94] Fixed typo --- src/r_draw_rgba.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 0ab3e298a..15a76c689 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -173,12 +173,12 @@ public: class DrawerCommand { protected: - int dc_dest_y; + int _dest_y; public: DrawerCommand() { - dc_dest_y = static_cast((dc_dest - dc_destorg) / (dc_pitch * 4)); + _dest_y = static_cast((dc_dest - dc_destorg) / (dc_pitch * 4)); } virtual void Execute(DrawerThread *thread) = 0; From 000008e04dfa50fa5443d005d8076511dba1ca46 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 17 Jun 2016 10:14:33 +0200 Subject: [PATCH 50/94] Fixed empty canvas in kdizd intermission screen --- src/r_swrenderer.cpp | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index c4347236d..c788dfd54 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -370,7 +370,30 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin FTexture::FlipNonSquareBlockRemap(Pixels, Canvas->GetBuffer(), tex->GetWidth(), tex->GetHeight(), Canvas->GetPitch(), GPalette.Remap); } } - tex->SetUpdated(); + + if (r_swtruecolor) + { + // True color render still sometimes uses palette textures (for sprites, mostly). + // We need to make sure that both pixel buffers contain data: + int width = tex->GetWidth(); + int height = tex->GetHeight(); + BYTE *palbuffer = (BYTE *)tex->GetPixels(); + uint32_t *bgrabuffer = (uint32_t*)tex->GetPixelsBgra(); + for (int x = 0; x < width; x++) + { + for (int y = 0; y < height; y++) + { + uint32_t color = bgrabuffer[y]; + int r = RPART(color); + int g = GPART(color); + int b = BPART(color); + palbuffer[y] = RGB32k.RGB[r >> 3][g >> 3][b >> 3]; + } + palbuffer += height; + bgrabuffer += height; + } + } + fixedcolormap = savecolormap; realfixedcolormap = savecm; } From 5963f29afd0906d7c4d1c99f116e78907c60dac4 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 17 Jun 2016 10:16:34 +0200 Subject: [PATCH 51/94] Added missing SetUpdated --- src/r_swrenderer.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index c788dfd54..c81d2a110 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -394,6 +394,8 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin } } + tex->SetUpdated(); + fixedcolormap = savecolormap; realfixedcolormap = savecm; } From 12a50c140c7656ad944df027bd9f5c332f48f698 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 17 Jun 2016 10:47:30 +0200 Subject: [PATCH 52/94] Fix animated textures not updating in swtruecolor mode --- src/g_strife/strife_sbar.cpp | 11 +++++++++++ src/menu/playerdisplay.cpp | 11 +++++++++++ src/textures/textures.h | 1 + src/textures/warptexture.cpp | 12 ++++++++++++ 4 files changed, 35 insertions(+) diff --git a/src/g_strife/strife_sbar.cpp b/src/g_strife/strife_sbar.cpp index eb3fa2608..e1fcb3cda 100644 --- a/src/g_strife/strife_sbar.cpp +++ b/src/g_strife/strife_sbar.cpp @@ -34,6 +34,7 @@ public: const BYTE *GetColumn (unsigned int column, const Span **spans_out); const BYTE *GetPixels (); + const uint32_t *GetPixelsBgra() override; bool CheckModified (); void SetVial (int level); @@ -115,6 +116,16 @@ const BYTE *FHealthBar::GetPixels () return Pixels; } +const uint32_t *FHealthBar::GetPixelsBgra() +{ + if (NeedRefresh) + { + MakeTexture(); + PixelsBgra.clear(); + } + return FTexture::GetPixelsBgra(); +} + void FHealthBar::SetVial (int level) { if (level < 0) diff --git a/src/menu/playerdisplay.cpp b/src/menu/playerdisplay.cpp index 16671975a..7b7e9ca5d 100644 --- a/src/menu/playerdisplay.cpp +++ b/src/menu/playerdisplay.cpp @@ -78,6 +78,7 @@ public: const BYTE *GetColumn(unsigned int column, const Span **spans_out); const BYTE *GetPixels(); + const uint32_t *GetPixelsBgra() override; bool CheckModified(); protected: @@ -246,6 +247,16 @@ const BYTE *FBackdropTexture::GetPixels() return Pixels; } +const uint32_t *FBackdropTexture::GetPixelsBgra() +{ + if (LastRenderTic != gametic) + { + Render(); + PixelsBgra.clear(); + } + return FTexture::GetPixelsBgra(); +} + //============================================================================= // // This is one plasma and two rotozoomers. I think it turned out quite awesome. diff --git a/src/textures/textures.h b/src/textures/textures.h index 38d1ef487..3b4b0b8b3 100644 --- a/src/textures/textures.h +++ b/src/textures/textures.h @@ -484,6 +484,7 @@ public: virtual int CopyTrueColorPixels(FBitmap *bmp, int x, int y, int rotate=0, FCopyInfo *inf = NULL); const BYTE *GetColumn (unsigned int column, const Span **spans_out); const BYTE *GetPixels (); + const uint32_t *GetPixelsBgra() override; void Unload (); bool CheckModified (); diff --git a/src/textures/warptexture.cpp b/src/textures/warptexture.cpp index b6977dd77..0d18ab58f 100644 --- a/src/textures/warptexture.cpp +++ b/src/textures/warptexture.cpp @@ -93,6 +93,18 @@ const BYTE *FWarpTexture::GetPixels () return Pixels; } +const uint32_t *FWarpTexture::GetPixelsBgra() +{ + DWORD time = r_FrameTime; + + if (Pixels == NULL || time != GenTime) + { + MakeTexture(time); + PixelsBgra.clear(); + } + return FTexture::GetPixelsBgra(); +} + const BYTE *FWarpTexture::GetColumn (unsigned int column, const Span **spans_out) { DWORD time = r_FrameTime; From 35c078dc1e0f4dc67d0ed3cd09f292e257fb1b9c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 17 Jun 2016 11:24:21 +0200 Subject: [PATCH 53/94] Screenshot fix --- src/v_video.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/v_video.cpp b/src/v_video.cpp index 2cf04a29d..bcd49f920 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -445,7 +445,7 @@ void DCanvas::GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &colo { Lock(true); buffer = GetBuffer(); - pitch = GetPitch(); + pitch = IsBgra() ? GetPitch() * 4 : GetPitch(); color_type = IsBgra() ? SS_BGRA : SS_PAL; } From f53e468f3f32fcb544842e86be5c06a63b0c3f31 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 17 Jun 2016 12:38:00 +0200 Subject: [PATCH 54/94] Fixed fill column rgba drawers --- src/r_draw.cpp | 11 +++-- src/r_draw.h | 1 + src/r_draw_rgba.cpp | 114 ++++++++++++++++++++++++++++---------------- 3 files changed, 82 insertions(+), 44 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 4dcdc3e6b..7829e2b77 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -154,6 +154,7 @@ fixed_t dc_iscale; fixed_t dc_texturefrac; int dc_color; // [RH] Color for column filler DWORD dc_srccolor; +uint32_t dc_srccolor_bgra; DWORD *dc_srcblend; // [RH] Source and destination DWORD *dc_destblend; // blending lookups fixed_t dc_srcalpha; // Alpha value used by dc_srcblend @@ -2702,10 +2703,10 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, if (style.Flags & STYLEF_ColorIsFixed) { - int x = fglevel >> 10; - int r = RPART(color); - int g = GPART(color); - int b = BPART(color); + uint32_t x = fglevel >> 10; + uint32_t r = RPART(color); + uint32_t g = GPART(color); + uint32_t b = BPART(color); // dc_color is used by the rt_* routines. It is indexed into dc_srcblend. dc_color = RGB32k.RGB[r>>3][g>>3][b>>3]; if (style.Flags & STYLEF_InvertSource) @@ -2714,6 +2715,8 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, g = 255 - g; b = 255 - b; } + uint32_t alpha = clamp(fglevel >> (FRACBITS - 8), 0, 255); + dc_srccolor_bgra = (alpha << 24) | (r << 16) | (g << 8) | b; // dc_srccolor is used by the R_Fill* routines. It is premultiplied // with the alpha. dc_srccolor = ((((r*x)>>4)<<20) | ((g*x)>>4) | ((((b)*x)>>4)<<10)) & 0x3feffbff; diff --git a/src/r_draw.h b/src/r_draw.h index a31183405..99ee4d10d 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -63,6 +63,7 @@ extern double dc_texturemid; extern "C" fixed_t dc_texturefrac; extern "C" int dc_color; // [RH] For flat colors (no texturing) extern "C" DWORD dc_srccolor; +extern "C" uint32_t dc_srccolor_bgra; extern "C" DWORD *dc_srcblend; extern "C" DWORD *dc_destblend; extern "C" fixed_t dc_srcalpha; diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 722fbb8cd..491c6ab98 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -344,8 +344,7 @@ class FillAddColumnRGBACommand : public DrawerCommand int _count; BYTE *_dest; int _pitch; - fixed_t _light; - int _color; + uint32_t _srccolor; public: FillAddColumnRGBACommand() @@ -353,8 +352,7 @@ public: _count = dc_count; _dest = dc_dest; _pitch = dc_pitch; - _light = dc_light; - _color = dc_color; + _srccolor = dc_srccolor_bgra; } void Execute(DrawerThread *thread) override @@ -369,10 +367,18 @@ public: dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); - uint32_t fg_red = (fg >> 24) & 0xff; - uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg = _srccolor; + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; + uint32_t fg_alpha = fg >> 24; + fg_alpha += fg_alpha >> 7; + + fg_red *= fg_alpha; + fg_green *= fg_alpha; + fg_blue *= fg_alpha; + + uint32_t inv_alpha = 256 - fg_alpha; do { @@ -380,9 +386,9 @@ public: uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; + uint32_t red = (fg_red + bg_red * inv_alpha) / 256; + uint32_t green = (fg_green + bg_green * inv_alpha) / 256; + uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -395,8 +401,10 @@ class FillAddClampColumnRGBACommand : public DrawerCommand int _count; BYTE *_dest; int _pitch; - fixed_t _light; int _color; + uint32_t _srccolor; + fixed_t _srcalpha; + fixed_t _destalpha; public: FillAddClampColumnRGBACommand() @@ -404,8 +412,10 @@ public: _count = dc_count; _dest = dc_dest; _pitch = dc_pitch; - _light = dc_light; _color = dc_color; + _srccolor = dc_srccolor_bgra; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -420,20 +430,26 @@ public: dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); - uint32_t fg_red = (fg >> 24) & 0xff; - uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg = _srccolor; + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + + fg_red *= fg_alpha; + fg_green *= fg_alpha; + fg_blue *= fg_alpha; + + do { - do - { uint32_t bg_red = (*dest >> 16) & 0xff; uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue + bg_blue * bg_alpha) / 256, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -447,7 +463,9 @@ class FillSubClampColumnRGBACommand : public DrawerCommand BYTE *_dest; int _pitch; int _color; - fixed_t _light; + uint32_t _srccolor; + fixed_t _srcalpha; + fixed_t _destalpha; public: FillSubClampColumnRGBACommand() @@ -456,7 +474,9 @@ public: _dest = dc_dest; _pitch = dc_pitch; _color = dc_color; - _light = dc_light; + _srccolor = dc_srccolor_bgra; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -471,20 +491,25 @@ public: dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); - uint32_t fg_red = (fg >> 24) & 0xff; - uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg = _srccolor; + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - do - { + fg_red *= fg_alpha; + fg_green *= fg_alpha; + fg_blue *= fg_alpha; + + do { uint32_t bg_red = (*dest >> 16) & 0xff; uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 255; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 255; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 255; + uint32_t red = clamp((0x10000 - fg_red + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -498,7 +523,9 @@ class FillRevSubClampColumnRGBACommand : public DrawerCommand BYTE *_dest; int _pitch; int _color; - fixed_t _light; + uint32_t _srccolor; + fixed_t _srcalpha; + fixed_t _destalpha; public: FillRevSubClampColumnRGBACommand() @@ -507,7 +534,9 @@ public: _dest = dc_dest; _pitch = dc_pitch; _color = dc_color; - _light = dc_light; + _srccolor = dc_srccolor_bgra; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -522,20 +551,25 @@ public: dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); - uint32_t fg_red = (fg >> 24) & 0xff; - uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg = _srccolor; + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - do - { + fg_red *= fg_alpha; + fg_green *= fg_alpha; + fg_blue *= fg_alpha; + + do { uint32_t bg_red = (*dest >> 16) & 0xff; uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 255; - uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 255; - uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 255; + uint32_t red = clamp((0x10000 + fg_red - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; From 822bbd5b9a015141d6ca17d19df8b8a8600f220b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 17 Jun 2016 13:40:23 +0200 Subject: [PATCH 55/94] Fuzz (invisibility) adjustments --- src/r_draw_rgba.cpp | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 491c6ab98..bdbcd1250 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -584,8 +584,8 @@ class DrawFuzzColumnRGBACommand : public DrawerCommand int _yh; BYTE *_destorg; int _pitch; - int fuzzpos; - int fuzzviewheight; + int _fuzzpos; + int _fuzzviewheight; public: DrawFuzzColumnRGBACommand() @@ -595,8 +595,8 @@ public: _yh = dc_yh; _destorg = dc_destorg; _pitch = dc_pitch; - fuzzpos = ::fuzzpos; - fuzzviewheight = ::fuzzviewheight; + _fuzzpos = fuzzpos; + _fuzzviewheight = fuzzviewheight; } void Execute(DrawerThread *thread) override @@ -609,8 +609,8 @@ public: _yl = 1; // .. and high. - if (_yh > fuzzviewheight) - _yh = fuzzviewheight; + if (_yh > _fuzzviewheight) + _yh = _fuzzviewheight; count = thread->count_for_thread(_yl, _yh - _yl + 1); @@ -622,7 +622,7 @@ public: int pitch = _pitch * thread->num_cores; int fuzzstep = thread->num_cores; - int fuzz = (fuzzpos + thread->skipped_by_thread(_yl)) % FUZZTABLE; + int fuzz = (_fuzzpos + thread->skipped_by_thread(_yl)) % FUZZTABLE; while (count > 0) { @@ -640,9 +640,9 @@ public: uint32_t bg_green = (bg >> 8) & 0xff; uint32_t bg_blue = (bg) & 0xff; - uint32_t red = bg_red * 3 / 4; - uint32_t green = bg_green * 3 / 4; - uint32_t blue = bg_blue * 3 / 4; + uint32_t red = bg_red * 7 / 8; + uint32_t green = bg_green * 7 / 8; + uint32_t blue = bg_blue * 7 / 8; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -3896,7 +3896,13 @@ void R_FillRevSubClampColumn_rgba() void R_DrawFuzzColumn_rgba() { DrawerCommandQueue::QueueCommand(); - fuzzpos = (fuzzpos + dc_yh - dc_yl) % FUZZTABLE; + + if (dc_yl == 0) + dc_yl = 1; + if (dc_yh > fuzzviewheight) + dc_yh = fuzzviewheight; + + fuzzpos = (fuzzpos + dc_yh - dc_yl + 1) % FUZZTABLE; } void R_DrawAddColumn_rgba() From 4ef2fb3cdb19d71a67c21c860f7d0bea96db8cb2 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 17 Jun 2016 14:45:52 +0200 Subject: [PATCH 56/94] Fixed multithreaded rendering issue with the fuzz effect --- src/r_draw_rgba.cpp | 75 +++++++++++++++++++++++++++++++++------------ 1 file changed, 55 insertions(+), 20 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index bdbcd1250..b1ee1f02c 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -604,26 +604,48 @@ public: int count; uint32_t *dest; - // Adjust borders. Low... - if (_yl == 0) - _yl = 1; + int yl = MAX(_yl, 1); + int yh = MIN(_yh, _fuzzviewheight); - // .. and high. - if (_yh > _fuzzviewheight) - _yh = _fuzzviewheight; - - count = thread->count_for_thread(_yl, _yh - _yl + 1); + count = thread->count_for_thread(yl, yh - yl + 1); // Zero length. if (count <= 0) return; - dest = thread->dest_for_thread(_yl, _pitch, ylookup[_yl] + _x + (uint32_t*)_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + _x + (uint32_t*)_destorg); int pitch = _pitch * thread->num_cores; int fuzzstep = thread->num_cores; - int fuzz = (_fuzzpos + thread->skipped_by_thread(_yl)) % FUZZTABLE; + int fuzz = (_fuzzpos + thread->skipped_by_thread(yl)) % FUZZTABLE; + yl += thread->skipped_by_thread(yl); + + // Handle the case where we would go out of bounds at the top: + if (yl < fuzzstep) + { + count--; + + uint32_t bg = dest[fuzzoffset[fuzz] * fuzzstep + pitch]; + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = (bg) & 0xff; + + uint32_t red = bg_red * 3 / 4; + uint32_t green = bg_green * 3 / 4; + uint32_t blue = bg_blue * 3 / 4; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + fuzz += fuzzstep; + fuzz %= FUZZTABLE; + } + + bool lowerbounds = (yl + count * fuzzstep > _fuzzviewheight); + if (lowerbounds) + count--; + + // Fuzz where fuzzoffset stays within bounds while (count > 0) { int available = (FUZZTABLE - fuzz); @@ -635,14 +657,14 @@ public: count -= cnt; do { - uint32_t bg = dest[fuzzoffset[fuzz]]; + uint32_t bg = dest[fuzzoffset[fuzz] * fuzzstep]; uint32_t bg_red = (bg >> 16) & 0xff; uint32_t bg_green = (bg >> 8) & 0xff; uint32_t bg_blue = (bg) & 0xff; - uint32_t red = bg_red * 7 / 8; - uint32_t green = bg_green * 7 / 8; - uint32_t blue = bg_blue * 7 / 8; + uint32_t red = bg_red * 3 / 4; + uint32_t green = bg_green * 3 / 4; + uint32_t blue = bg_blue * 3 / 4; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -651,6 +673,21 @@ public: fuzz %= FUZZTABLE; } + + // Handle the case where we would go out of bounds at the bottom + if (lowerbounds) + { + uint32_t bg = dest[fuzzoffset[fuzz] * fuzzstep - pitch]; + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = (bg) & 0xff; + + uint32_t red = bg_red * 3 / 4; + uint32_t green = bg_green * 3 / 4; + uint32_t blue = bg_blue * 3 / 4; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } } }; @@ -3897,12 +3934,10 @@ void R_DrawFuzzColumn_rgba() { DrawerCommandQueue::QueueCommand(); - if (dc_yl == 0) - dc_yl = 1; - if (dc_yh > fuzzviewheight) - dc_yh = fuzzviewheight; - - fuzzpos = (fuzzpos + dc_yh - dc_yl + 1) % FUZZTABLE; + dc_yl = MAX(dc_yl, 1); + dc_yh = MIN(dc_yh, fuzzviewheight); + if (dc_yl <= dc_yh) + fuzzpos = (fuzzpos + dc_yh - dc_yl + 1) % FUZZTABLE; } void R_DrawAddColumn_rgba() From e7cdcd9c0af6d82c8acc7c14102fbc8c4e34da1e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 18 Jun 2016 05:20:34 +0200 Subject: [PATCH 57/94] Change to one pass rendering to remove fuzz artifact --- src/r_draw_rgba.cpp | 6 ++++-- src/r_draw_rgba.h | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index b1ee1f02c..6021c9265 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -624,8 +624,6 @@ public: // Handle the case where we would go out of bounds at the top: if (yl < fuzzstep) { - count--; - uint32_t bg = dest[fuzzoffset[fuzz] * fuzzstep + pitch]; uint32_t bg_red = (bg >> 16) & 0xff; uint32_t bg_green = (bg >> 8) & 0xff; @@ -639,6 +637,10 @@ public: dest += pitch; fuzz += fuzzstep; fuzz %= FUZZTABLE; + + count--; + if (count == 0) + return; } bool lowerbounds = (yl + count * fuzzstep > _fuzzviewheight); diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 15a76c689..83977d65c 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -209,8 +209,8 @@ class DrawerCommandQueue int threaded_render = 0; DrawerThread single_core_thread; - int num_passes = 2; - int rows_in_pass = 540; + int num_passes = 1; + int rows_in_pass = MAXHEIGHT; void StartThreads(); void StopThreads(); From 3e7eb79729049302bf80e79908af8a8006c3841c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 18 Jun 2016 11:17:59 +0200 Subject: [PATCH 58/94] Added some experimental AVX2 drawers --- src/r_draw_rgba.cpp | 420 ++++++++++++++++++++++++++++++++++--------- src/r_draw_rgba.h | 91 ++++++++++ src/r_drawt_rgba.cpp | 46 ++--- 3 files changed, 454 insertions(+), 103 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 6021c9265..271250855 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -44,9 +44,14 @@ #include "x86.h" #ifndef NO_SSE #include +#include #endif #include +#ifdef _MSC_VER +#pragma warning(disable: 4752) // warning C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX +#endif + extern int vlinebits; extern int mvlinebits; extern int tmvlinebits; @@ -58,6 +63,8 @@ extern int wallshade; CVAR(Bool, r_multithreaded, true, 0) +//#define USE_AVX // Use AVX2 256 bit intrinsics (requires Haswell or newer) + ///////////////////////////////////////////////////////////////////////////// DrawerCommandQueue *DrawerCommandQueue::Instance() @@ -230,14 +237,14 @@ void DrawerCommandQueue::StopThreads() class DrawColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; DWORD _texturefrac; DWORD _iscale; fixed_t _light; - const BYTE *_source; + const BYTE * RESTRICT _source; int _pitch; ShadeConstants _shade_constants; - BYTE *_colormap; + BYTE * RESTRICT _colormap; public: DrawColumnRGBACommand() @@ -297,7 +304,7 @@ public: class FillColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; fixed_t _light; int _pitch; int _color; @@ -342,7 +349,7 @@ public: class FillAddColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; int _pitch; uint32_t _srccolor; @@ -399,7 +406,7 @@ public: class FillAddClampColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; int _pitch; int _color; uint32_t _srccolor; @@ -460,7 +467,7 @@ public: class FillSubClampColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; int _pitch; int _color; uint32_t _srccolor; @@ -520,7 +527,7 @@ public: class FillRevSubClampColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; int _pitch; int _color; uint32_t _srccolor; @@ -582,7 +589,7 @@ class DrawFuzzColumnRGBACommand : public DrawerCommand int _x; int _yl; int _yh; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; int _fuzzpos; int _fuzzviewheight; @@ -696,16 +703,16 @@ public: class DrawAddColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; DWORD _iscale; DWORD _texturefrac; - const BYTE *_source; + const BYTE * RESTRICT _source; int _pitch; fixed_t _light; ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - BYTE *_colormap; + BYTE * RESTRICT _colormap; public: DrawAddColumnRGBACommand() @@ -779,11 +786,11 @@ class DrawTranslatedColumnRGBACommand : public DrawerCommand int _count; fixed_t _light; ShadeConstants _shade_constants; - BYTE *_dest; + BYTE * RESTRICT _dest; DWORD _iscale; DWORD _texturefrac; - BYTE *_translation; - const BYTE *_source; + BYTE * RESTRICT _translation; + const BYTE * RESTRICT _source; int _pitch; public: @@ -840,11 +847,11 @@ class DrawTlatedAddColumnRGBACommand : public DrawerCommand int _count; fixed_t _light; ShadeConstants _shade_constants; - BYTE *_dest; + BYTE * RESTRICT _dest; DWORD _iscale; DWORD _texturefrac; - BYTE *_translation; - const BYTE *_source; + BYTE * RESTRICT _translation; + const BYTE * RESTRICT _source; int _pitch; fixed_t _srcalpha; fixed_t _destalpha; @@ -920,12 +927,12 @@ class DrawShadedColumnRGBACommand : public DrawerCommand { private: int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; DWORD _iscale; DWORD _texturefrac; fixed_t _light; - const BYTE *_source; - lighttable_t *_colormap; + const BYTE * RESTRICT _source; + lighttable_t * RESTRICT _colormap; int _color; int _pitch; @@ -993,10 +1000,10 @@ public: class DrawAddClampColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; DWORD _iscale; DWORD _texturefrac; - const BYTE *_source; + const BYTE * RESTRICT _source; int _pitch; fixed_t _light; ShadeConstants _shade_constants; @@ -1069,11 +1076,11 @@ public: class DrawAddClampTranslatedColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; DWORD _iscale; DWORD _texturefrac; - BYTE *_translation; - const BYTE *_source; + BYTE * RESTRICT _translation; + const BYTE * RESTRICT _source; int _pitch; fixed_t _light; ShadeConstants _shade_constants; @@ -1148,10 +1155,10 @@ public: class DrawSubClampColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; DWORD _iscale; DWORD _texturefrac; - const BYTE *_source; + const BYTE * RESTRICT _source; int _pitch; fixed_t _light; ShadeConstants _shade_constants; @@ -1224,16 +1231,16 @@ public: class DrawSubClampTranslatedColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; DWORD _iscale; DWORD _texturefrac; - const BYTE *_source; + const BYTE * RESTRICT _source; int _pitch; fixed_t _light; ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - BYTE *_translation; + BYTE * RESTRICT _translation; public: DrawSubClampTranslatedColumnRGBACommand() @@ -1303,10 +1310,10 @@ public: class DrawRevSubClampColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; DWORD _iscale; DWORD _texturefrac; - const BYTE *_source; + const BYTE * RESTRICT _source; int _pitch; fixed_t _light; ShadeConstants _shade_constants; @@ -1378,16 +1385,16 @@ public: class DrawRevSubClampTranslatedColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; DWORD _iscale; DWORD _texturefrac; - const BYTE *_source; + const BYTE * RESTRICT _source; int _pitch; fixed_t _light; ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - BYTE *_translation; + BYTE * RESTRICT _translation; public: DrawRevSubClampTranslatedColumnRGBACommand() @@ -1422,8 +1429,8 @@ public: frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - BYTE *translation = _translation; - const BYTE *source = _source; + BYTE * RESTRICT translation = _translation; + const BYTE * RESTRICT source = _source; int pitch = _pitch * thread->num_cores; uint32_t light = calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; @@ -1456,7 +1463,7 @@ public: class DrawSpanRGBACommand : public DrawerCommand { - const uint32_t *_source; + const uint32_t * RESTRICT _source; fixed_t _xfrac; fixed_t _yfrac; fixed_t _xstep; @@ -1466,7 +1473,7 @@ class DrawSpanRGBACommand : public DrawerCommand int _y; int _xbits; int _ybits; - BYTE *_destorg; + BYTE * RESTRICT _destorg; fixed_t _light; ShadeConstants _shade_constants; @@ -1539,6 +1546,181 @@ public: BYTE xshift = yshift - _xbits; int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + // Current texture index in u,v. + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + // Lookup pixel from flat texture tile + *dest++ = shade_bgra(source[spot], light, shade_constants); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } +#elif defined(USE_AVX) + void Execute(DrawerThread *thread) override + { + if (thread->line_skipped_by_thread(_y)) + return; + + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const uint32_t* source = _source; + int count; + int spot; + + xfrac = _xfrac; + yfrac = _yfrac; + + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; + + count = _x2 - _x1 + 1; + + xstep = _xstep; + ystep = _ystep; + + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; + + if (_xbits == 6 && _ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + + int sse_count = count / 8; + count -= sse_count * 8; + + if (shade_constants.simple_shade) + { + AVX2_SHADE_SIMPLE_INIT(light); + + while (sse_count--) + { + uint32_t fg_pixels[8]; + for (int i = 0; i < 8; i++) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + fg_pixels[i] = source[spot]; + xfrac += xstep; + yfrac += ystep; + } + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m256i fg = _mm256_loadu_si256((const __m256i*)fg_pixels); + AVX2_SHADE_SIMPLE(fg); + _mm256_storeu_si256((__m256i*)dest, fg); + + // Next step in u,v. + dest += 8; + } + } + else + { + AVX2_SHADE_INIT(light, shade_constants); + + while (sse_count--) + { + uint32_t fg_pixels[8]; + for (int i = 0; i < 8; i++) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + fg_pixels[i] = source[spot]; + xfrac += xstep; + yfrac += ystep; + } + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m256i fg = _mm256_loadu_si256((const __m256i*)fg_pixels); + AVX2_SHADE(fg, shade_constants); + _mm256_storeu_si256((__m256i*)dest, fg); + + // Next step in u,v. + dest += 8; + } + } + + if (count == 0) + return; + + do + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + + // Lookup pixel from flat texture tile + *dest++ = shade_bgra(source[spot], light, shade_constants); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + + int sse_count = count / 8; + count -= sse_count * 8; + + if (shade_constants.simple_shade) + { + AVX2_SHADE_SIMPLE_INIT(light); + + while (sse_count--) + { + uint32_t fg_pixels[8]; + for (int i = 0; i < 8; i++) + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + fg_pixels[i] = source[spot]; + xfrac += xstep; + yfrac += ystep; + } + + // Lookup pixel from flat texture tile + __m256i fg = _mm256_loadu_si256((const __m256i*)fg_pixels); + AVX2_SHADE_SIMPLE(fg); + _mm256_storeu_si256((__m256i*)dest, fg); + dest += 8; + } + } + else + { + AVX2_SHADE_INIT(light, shade_constants); + + while (sse_count--) + { + uint32_t fg_pixels[8]; + for (int i = 0; i < 8; i++) + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + fg_pixels[i] = source[spot]; + xfrac += xstep; + yfrac += ystep; + } + + // Lookup pixel from flat texture tile + __m256i fg = _mm256_loadu_si256((const __m256i*)fg_pixels); + AVX2_SHADE_SIMPLE(fg); + _mm256_storeu_si256((__m256i*)dest, fg); + dest += 4; + } + } + + if (count == 0) + return; + do { // Current texture index in u,v. @@ -1777,12 +1959,12 @@ public: class DrawSpanMaskedRGBACommand : public DrawerCommand { - const uint32_t *_source; + const uint32_t * RESTRICT _source; fixed_t _light; ShadeConstants _shade_constants; fixed_t _xfrac; fixed_t _yfrac; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _x1; int _x2; int _y1; @@ -1880,12 +2062,12 @@ public: class DrawSpanTranslucentRGBACommand : public DrawerCommand { - const uint32_t *_source; + const uint32_t * RESTRICT _source; fixed_t _light; ShadeConstants _shade_constants; fixed_t _xfrac; fixed_t _yfrac; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _x1; int _x2; int _y1; @@ -2006,12 +2188,12 @@ public: class DrawSpanMaskedTranslucentRGBACommand : public DrawerCommand { - const uint32_t *_source; + const uint32_t * RESTRICT _source; fixed_t _light; ShadeConstants _shade_constants; fixed_t _xfrac; fixed_t _yfrac; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _x1; int _x2; int _y1; @@ -2142,12 +2324,12 @@ public: class DrawSpanAddClampRGBACommand : public DrawerCommand { - const uint32_t *_source; + const uint32_t * RESTRICT _source; fixed_t _light; ShadeConstants _shade_constants; fixed_t _xfrac; fixed_t _yfrac; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _x1; int _x2; int _y1; @@ -2268,12 +2450,12 @@ public: class DrawSpanMaskedAddClampRGBACommand : public DrawerCommand { - const uint32_t *_source; + const uint32_t * RESTRICT _source; fixed_t _light; ShadeConstants _shade_constants; fixed_t _xfrac; fixed_t _yfrac; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _x1; int _x2; int _y1; @@ -2407,7 +2589,7 @@ class FillSpanRGBACommand : public DrawerCommand int _x1; int _x2; int _y; - BYTE *_destorg; + BYTE * RESTRICT _destorg; fixed_t _light; int _color; @@ -2441,8 +2623,8 @@ class Vlinec1RGBACommand : public DrawerCommand DWORD _iscale; DWORD _texturefrac; int _count; - const BYTE *_source; - BYTE *_dest; + const BYTE * RESTRICT _source; + BYTE * RESTRICT _dest; int vlinebits; int _pitch; fixed_t _light; @@ -2489,7 +2671,7 @@ public: class Vlinec4RGBACommand : public DrawerCommand { - BYTE *_dest; + BYTE * RESTRICT _dest; int _count; int _pitch; ShadeConstants _shade_constants; @@ -2497,7 +2679,7 @@ class Vlinec4RGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const uint32 *bufplce[4]; + const uint32 * RESTRICT bufplce[4]; public: Vlinec4RGBACommand() @@ -2553,6 +2735,84 @@ public: dest += pitch; } while (--count); } +#elif defined(USE_AVX) + void Execute(DrawerThread *thread) override + { + int count = thread->count_for_thread(_dest_y, _count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int bits = vlinebits; + int pitch = _pitch * thread->num_cores; + + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = _shade_constants; + + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + + if (count & 1) + { + DWORD place; + dest[0] = shade_bgra(bufplce[0][(place = local_vplce[0]) >> bits], light0, shade_constants); local_vplce[0] = place + local_vince[0]; + dest[1] = shade_bgra(bufplce[1][(place = local_vplce[1]) >> bits], light1, shade_constants); local_vplce[1] = place + local_vince[1]; + dest[2] = shade_bgra(bufplce[2][(place = local_vplce[2]) >> bits], light2, shade_constants); local_vplce[2] = place + local_vince[2]; + dest[3] = shade_bgra(bufplce[3][(place = local_vplce[3]) >> bits], light3, shade_constants); local_vplce[3] = place + local_vince[3]; + dest += pitch; + } + count /= 2; + + // Assume all columns come from the same texture (which they do): + const uint32_t *base_addr = MIN(MIN(MIN(bufplce[0], bufplce[1]), bufplce[2]), bufplce[3]); + __m256i column_offsets = _mm256_set_epi32( + bufplce[3] - base_addr, bufplce[2] - base_addr, bufplce[1] - base_addr, bufplce[0] - base_addr, + bufplce[3] - base_addr, bufplce[2] - base_addr, bufplce[1] - base_addr, bufplce[0] - base_addr); + + __m256i place = _mm256_set_epi32( + local_vplce[3] + local_vince[3], local_vplce[2] + local_vince[2], local_vplce[1] + local_vince[1], local_vplce[0] + local_vince[0], + local_vplce[3], local_vplce[2], local_vplce[1], local_vplce[0]); + + __m256i step = _mm256_set_epi32( + local_vince[3], local_vince[2], local_vince[1], local_vince[0], + local_vince[3], local_vince[2], local_vince[1], local_vince[0]); + step = _mm256_add_epi32(step, step); + + if (shade_constants.simple_shade) + { + AVX2_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + while (count--) + { + __m256i fg = _mm256_i32gather_epi32((const int *)base_addr, _mm256_add_epi32(column_offsets, _mm256_srli_epi32(place, bits)), 4); + place = _mm256_add_epi32(place, step); + AVX2_SHADE_SIMPLE(fg); + _mm256_storeu2_m128i((__m128i*)(dest + pitch), (__m128i*)dest, fg); + dest += pitch * 2; + } + } + else + { + AVX2_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + while (count--) + { + __m256i fg = _mm256_i32gather_epi32((const int *)base_addr, _mm256_add_epi32(column_offsets, _mm256_srai_epi32(place, bits)), 4); + place = _mm256_add_epi32(place, step); + AVX2_SHADE(fg, shade_constants); + _mm256_storeu2_m128i((__m128i*)(dest + pitch), (__m128i*)dest, fg); + dest += pitch * 2; + } + } + } #else void Execute(DrawerThread *thread) override { @@ -2641,8 +2901,8 @@ class Mvlinec1RGBACommand : public DrawerCommand DWORD _iscale; DWORD _texturefrac; int _count; - const BYTE *_source; - BYTE *_dest; + const BYTE * RESTRICT _source; + BYTE * RESTRICT _dest; int mvlinebits; int _pitch; fixed_t _light; @@ -2693,7 +2953,7 @@ public: class Mvlinec4RGBACommand : public DrawerCommand { - BYTE *_dest; + BYTE * RESTRICT _dest; int _count; int _pitch; ShadeConstants _shade_constants; @@ -2701,7 +2961,7 @@ class Mvlinec4RGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const uint32 *bufplce[4]; + const uint32 * RESTRICT bufplce[4]; public: Mvlinec4RGBACommand() @@ -2852,8 +3112,8 @@ class Tmvline1AddRGBACommand : public DrawerCommand DWORD _iscale; DWORD _texturefrac; int _count; - const BYTE *_source; - BYTE *_dest; + const BYTE * RESTRICT _source; + BYTE * RESTRICT _dest; int tmvlinebits; int _pitch; fixed_t _light; @@ -2924,7 +3184,7 @@ public: class Tmvline4AddRGBACommand : public DrawerCommand { - BYTE *_dest; + BYTE * RESTRICT _dest; int _count; int _pitch; ShadeConstants _shade_constants; @@ -2934,7 +3194,7 @@ class Tmvline4AddRGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const uint32 *bufplce[4]; + const uint32 * RESTRICT bufplce[4]; public: Tmvline4AddRGBACommand() @@ -3019,8 +3279,8 @@ class Tmvline1AddClampRGBACommand : public DrawerCommand DWORD _iscale; DWORD _texturefrac; int _count; - const BYTE *_source; - BYTE *_dest; + const BYTE * RESTRICT _source; + BYTE * RESTRICT _dest; int tmvlinebits; int _pitch; fixed_t _light; @@ -3091,7 +3351,7 @@ public: class Tmvline4AddClampRGBACommand : public DrawerCommand { - BYTE *_dest; + BYTE * RESTRICT _dest; int _count; int _pitch; ShadeConstants _shade_constants; @@ -3101,7 +3361,7 @@ class Tmvline4AddClampRGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const uint32 *bufplce[4]; + const uint32 *RESTRICT bufplce[4]; public: Tmvline4AddClampRGBACommand() @@ -3186,8 +3446,8 @@ class Tmvline1SubClampRGBACommand : public DrawerCommand DWORD _iscale; DWORD _texturefrac; int _count; - const BYTE *_source; - BYTE *_dest; + const BYTE * RESTRICT _source; + BYTE * RESTRICT _dest; int tmvlinebits; int _pitch; fixed_t _light; @@ -3258,7 +3518,7 @@ public: class Tmvline4SubClampRGBACommand : public DrawerCommand { - BYTE *_dest; + BYTE * RESTRICT _dest; int _count; int _pitch; ShadeConstants _shade_constants; @@ -3268,7 +3528,7 @@ class Tmvline4SubClampRGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const uint32 *bufplce[4]; + const uint32 *RESTRICT bufplce[4]; public: Tmvline4SubClampRGBACommand() @@ -3353,8 +3613,8 @@ class Tmvline1RevSubClampRGBACommand : public DrawerCommand DWORD _iscale; DWORD _texturefrac; int _count; - const BYTE *_source; - BYTE *_dest; + const BYTE * RESTRICT _source; + BYTE * RESTRICT _dest; int tmvlinebits; int _pitch; fixed_t _light; @@ -3425,7 +3685,7 @@ public: class Tmvline4RevSubClampRGBACommand : public DrawerCommand { - BYTE *_dest; + BYTE * RESTRICT _dest; int _count; int _pitch; ShadeConstants _shade_constants; @@ -3435,7 +3695,7 @@ class Tmvline4RevSubClampRGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const uint32 *bufplce[4]; + const uint32 *RESTRICT bufplce[4]; public: Tmvline4RevSubClampRGBACommand() @@ -3520,7 +3780,7 @@ class DrawFogBoundaryLineRGBACommand : public DrawerCommand int _y; int _x; int _x2; - BYTE *_destorg; + BYTE * RESTRICT _destorg; fixed_t _light; ShadeConstants _shade_constants; @@ -3592,10 +3852,10 @@ class DrawTiltedSpanRGBACommand : public DrawerCommand int _y; int _x1; int _x2; - BYTE *_destorg; + BYTE * RESTRICT _destorg; fixed_t _light; ShadeConstants _shade_constants; - const BYTE *_source; + const BYTE * RESTRICT _source; public: DrawTiltedSpanRGBACommand(int y, int x1, int x2) @@ -3637,7 +3897,7 @@ class DrawColoredSpanRGBACommand : public DrawerCommand int _y; int _x1; int _x2; - BYTE *_destorg; + BYTE * RESTRICT _destorg; fixed_t _light; int _color; @@ -3678,7 +3938,7 @@ class FillTransColumnRGBACommand : public DrawerCommand int _y2; int _color; int _a; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; fixed_t _light; diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 83977d65c..8f051b4cb 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -290,6 +290,17 @@ public: #endif #endif +// Promise compiler we have no aliasing of this pointer +#ifndef RESTRICT +#if defined(_MSC_VER) +#define RESTRICT __restrict +#elif defined(__GNUC__) +#define RESTRICT __restrict__ +#else +#define RESTRICT +#endif +#endif + // calculates the light constant passed to the shade_pal_index function FORCEINLINE uint32_t calc_light_multiplier(dsfixed_t light) { @@ -413,6 +424,86 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) return 0xff000000 | (red << 16) | (green << 8) | blue; } +// Calculate constants for a simple shade +#define AVX2_SHADE_SIMPLE_INIT(light) \ + __m256i mlight = _mm256_set_epi16(256, light, light, light, 256, light, light, light, 256, light, light, light, 256, light, light, light); + +// Calculate constants for a simple shade with different light levels for each pixel +#define AVX2_SHADE_SIMPLE_INIT4(light3, light2, light1, light0) \ + __m256i mlight = _mm256_set_epi16(256, light3, light3, light3, 256, light2, light2, light2, 256, light1, light1, light1, 256, light0, light0, light0); + +// Simple shade 8 pixels +#define AVX2_SHADE_SIMPLE(fg) { \ + __m256i fg_hi = _mm256_unpackhi_epi8(fg, _mm256_setzero_si256()); \ + __m256i fg_lo = _mm256_unpacklo_epi8(fg, _mm256_setzero_si256()); \ + fg_hi = _mm256_mullo_epi16(fg_hi, mlight); \ + fg_hi = _mm256_srli_epi16(fg_hi, 8); \ + fg_lo = _mm256_mullo_epi16(fg_lo, mlight); \ + fg_lo = _mm256_srli_epi16(fg_lo, 8); \ + fg = _mm256_packus_epi16(fg_lo, fg_hi); \ +} + +// Calculate constants for a complex shade +#define AVX2_SHADE_INIT(light, shade_constants) \ + __m256i mlight = _mm256_set_epi16(256, light, light, light, 256, light, light, light, 256, light, light, light, 256, light, light, light); \ + __m256i color = _mm256_set_epi16( \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ + __m256i fade = _mm256_set_epi16( \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ + __m256i fade_amount = _mm256_mullo_epi16(fade, _mm256_subs_epu16(_mm256_set1_epi16(256), mlight)); \ + __m256i desaturate = _mm256_set1_epi16(shade_constants.desaturate); \ + __m256i inv_desaturate = _mm256_set1_epi16(256 - shade_constants.desaturate); + +// Calculate constants for a complex shade with different light levels for each pixel +#define AVX2_SHADE_INIT4(light3, light2, light1, light0, shade_constants) \ + __m256i mlight = _mm256_set_epi16(256, light3, light3, light3, 256, light2, light2, light2, 256, light1, light1, light1, 256, light0, light0, light0); \ + __m256i color = _mm256_set_epi16( \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ + __m256i fade = _mm256_set_epi16( \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ + __m256i fade_amount = _mm256_mullo_epi16(fade, _mm256_subs_epu16(_mm256_set1_epi16(256), mlight)); \ + __m256i desaturate = _mm256_set1_epi16(shade_constants.desaturate); \ + __m256i inv_desaturate = _mm256_set1_epi16(256 - shade_constants.desaturate); + +// Complex shade 8 pixels +#define AVX2_SHADE(fg, shade_constants) { \ + __m256i fg_hi = _mm256_unpackhi_epi8(fg, _mm256_setzero_si256()); \ + __m256i fg_lo = _mm256_unpacklo_epi8(fg, _mm256_setzero_si256()); \ + \ + __m256i intensity_hi = _mm256_mullo_epi16(fg_hi, _mm256_set_epi16(0, 77, 143, 37, 0, 77, 143, 37, 0, 77, 143, 37, 0, 77, 143, 37)); \ + __m256i intensity_lo = _mm256_mullo_epi16(fg_lo, _mm256_set_epi16(0, 77, 143, 37, 0, 77, 143, 37, 0, 77, 143, 37, 0, 77, 143, 37)); \ + __m256i intensity = _mm256_mullo_epi16(_mm256_srli_epi16(_mm256_hadd_epi16(_mm256_hadd_epi16(intensity_lo, intensity_hi), _mm256_setzero_si256()), 8), desaturate); \ + intensity = _mm256_unpacklo_epi16(intensity, intensity); \ + intensity_hi = _mm256_unpackhi_epi32(intensity, intensity); \ + intensity_lo = _mm256_unpacklo_epi32(intensity, intensity); \ + \ + fg_hi = _mm256_srli_epi16(_mm256_adds_epu16(_mm256_mullo_epi16(fg_hi, inv_desaturate), intensity_hi), 8); \ + fg_hi = _mm256_srli_epi16(_mm256_adds_epu16(_mm256_mullo_epi16(fg_hi, mlight), fade_amount), 8); \ + fg_hi = _mm256_srli_epi16(_mm256_mullo_epi16(fg_hi, color), 8); \ + \ + fg_lo = _mm256_srli_epi16(_mm256_adds_epu16(_mm256_mullo_epi16(fg_lo, inv_desaturate), intensity_lo), 8); \ + fg_lo = _mm256_srli_epi16(_mm256_adds_epu16(_mm256_mullo_epi16(fg_lo, mlight), fade_amount), 8); \ + fg_lo = _mm256_srli_epi16(_mm256_mullo_epi16(fg_lo, color), 8); \ + \ + fg = _mm256_packus_epi16(fg_lo, fg_hi); \ +} + + + + + // Calculate constants for a simple shade #define SSE_SHADE_SIMPLE_INIT(light) \ __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 2311cb447..269dd9d9d 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -59,7 +59,7 @@ class RtCopy1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; public: @@ -123,9 +123,9 @@ class RtMap1colRGBACommand : public DrawerCommand int yh; fixed_t _light; ShadeConstants _shade_constants; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; - BYTE *_colormap; + BYTE * RESTRICT _colormap; public: RtMap1colRGBACommand(int hx, int sx, int yl, int yh) @@ -188,9 +188,9 @@ class RtMap4colsRGBACommand : public DrawerCommand int yh; fixed_t _light; ShadeConstants _shade_constants; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; - BYTE *_colormap; + BYTE * RESTRICT _colormap; public: RtMap4colsRGBACommand(int sx, int yl, int yh) @@ -383,7 +383,7 @@ public: class RtTranslate1colRGBACommand : public DrawerCommand { - const BYTE *translation; + const BYTE * RESTRICT translation; int hx; int yl; int yh; @@ -447,7 +447,7 @@ public: class RtTranslate4colsRGBACommand : public DrawerCommand { - const BYTE *translation; + const BYTE * RESTRICT translation; int yl; int yh; @@ -507,13 +507,13 @@ class RtAdd1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; fixed_t _light; ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - BYTE *_colormap; + BYTE * RESTRICT _colormap; public: RtAdd1colRGBACommand(int hx, int sx, int yl, int yh) @@ -583,11 +583,11 @@ class RtAdd4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; fixed_t _light; ShadeConstants _shade_constants; - BYTE *_colormap; + BYTE * RESTRICT _colormap; fixed_t _srcalpha; fixed_t _destalpha; @@ -764,8 +764,8 @@ class RtShaded1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - lighttable_t *_colormap; - BYTE *_destorg; + lighttable_t * RESTRICT _colormap; + BYTE * RESTRICT _destorg; int _pitch; int _color; fixed_t _light; @@ -833,9 +833,9 @@ class RtShaded4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - lighttable_t *_colormap; + lighttable_t * RESTRICT _colormap; int _color; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; fixed_t _light; @@ -957,7 +957,7 @@ class RtAddClamp1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; fixed_t _light; ShadeConstants _shade_constants; @@ -1029,7 +1029,7 @@ class RtAddClamp4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; fixed_t _light; fixed_t _srcalpha; @@ -1205,7 +1205,7 @@ class RtSubClamp1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; fixed_t _light; fixed_t _srcalpha; @@ -1277,7 +1277,7 @@ class RtSubClamp4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; fixed_t _light; fixed_t _srcalpha; @@ -1353,7 +1353,7 @@ class RtRevSubClamp1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; fixed_t _light; fixed_t _srcalpha; @@ -1425,7 +1425,7 @@ class RtRevSubClamp4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; fixed_t _light; fixed_t _srcalpha; @@ -1497,7 +1497,7 @@ public: class RtInitColsRGBACommand : public DrawerCommand { - BYTE *buff; + BYTE * RESTRICT buff; public: RtInitColsRGBACommand(BYTE *buff) @@ -1516,7 +1516,7 @@ class DrawColumnHorizRGBACommand : public DrawerCommand int _count; fixed_t _iscale; fixed_t _texturefrac; - const BYTE *_source; + const BYTE * RESTRICT _source; int _x; int _yl; int _yh; From 3f905197d09e224db664264c9c8534985ca4c7df Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 19 Jun 2016 07:40:01 +0200 Subject: [PATCH 59/94] Moved vectorized drawers to their own files --- src/r_draw_rgba.cpp | 701 +++-------------------------------------- src/r_draw_rgba.h | 161 +++++++--- src/r_draw_rgba_sse.h | 491 +++++++++++++++++++++++++++++ src/r_drawt_rgba.cpp | 443 ++++---------------------- src/r_drawt_rgba_sse.h | 495 +++++++++++++++++++++++++++++ 5 files changed, 1212 insertions(+), 1079 deletions(-) create mode 100644 src/r_draw_rgba_sse.h create mode 100644 src/r_drawt_rgba_sse.h diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 271250855..28c5df2ac 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -48,10 +48,6 @@ #endif #include -#ifdef _MSC_VER -#pragma warning(disable: 4752) // warning C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX -#endif - extern int vlinebits; extern int mvlinebits; extern int tmvlinebits; @@ -62,8 +58,38 @@ extern float rw_lightstep; extern int wallshade; CVAR(Bool, r_multithreaded, true, 0) +CVAR(Bool, r_linearlight, false, 0) -//#define USE_AVX // Use AVX2 256 bit intrinsics (requires Haswell or newer) +#ifndef NO_SSE + +// Generate SSE drawers: +#define VecCommand(name) name##_SSE_Command +#define VEC_SHADE_SIMPLE_INIT SSE_SHADE_SIMPLE_INIT +#define VEC_SHADE_SIMPLE_INIT4 SSE_SHADE_SIMPLE_INIT4 +#define VEC_SHADE_SIMPLE SSE_SHADE_SIMPLE +#define VEC_SHADE_INIT SSE_SHADE_INIT +#define VEC_SHADE_INIT4 SSE_SHADE_INIT4 +#define VEC_SHADE SSE_SHADE +#include "r_draw_rgba_sse.h" + +// Generate AVX drawers: +#undef VecCommand +#undef VEC_SHADE_SIMPLE_INIT +#undef VEC_SHADE_SIMPLE_INIT4 +#undef VEC_SHADE_SIMPLE +#undef VEC_SHADE_INIT +#undef VEC_SHADE_INIT4 +#undef VEC_SHADE +#define VecCommand(name) name##_AVX_Command +#define VEC_SHADE_SIMPLE_INIT AVX_LINEAR_SHADE_SIMPLE_INIT +#define VEC_SHADE_SIMPLE_INIT4 AVX_LINEAR_SHADE_SIMPLE_INIT4 +#define VEC_SHADE_SIMPLE AVX_LINEAR_SHADE_SIMPLE +#define VEC_SHADE_INIT AVX_LINEAR_SHADE_INIT +#define VEC_SHADE_INIT4 AVX_LINEAR_SHADE_INIT4 +#define VEC_SHADE AVX_LINEAR_SHADE +#include "r_draw_rgba_sse.h" + +#endif ///////////////////////////////////////////////////////////////////////////// @@ -1495,7 +1521,6 @@ public: _shade_constants = ds_shade_constants; } -#ifdef NO_SSE void Execute(DrawerThread *thread) override { if (thread->line_skipped_by_thread(_y)) @@ -1560,401 +1585,6 @@ public: } while (--count); } } -#elif defined(USE_AVX) - void Execute(DrawerThread *thread) override - { - if (thread->line_skipped_by_thread(_y)) - return; - - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const uint32_t* source = _source; - int count; - int spot; - - xfrac = _xfrac; - yfrac = _yfrac; - - dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - - count = _x2 - _x1 + 1; - - xstep = _xstep; - ystep = _ystep; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - if (_xbits == 6 && _ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - - int sse_count = count / 8; - count -= sse_count * 8; - - if (shade_constants.simple_shade) - { - AVX2_SHADE_SIMPLE_INIT(light); - - while (sse_count--) - { - uint32_t fg_pixels[8]; - for (int i = 0; i < 8; i++) - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - fg_pixels[i] = source[spot]; - xfrac += xstep; - yfrac += ystep; - } - - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - __m256i fg = _mm256_loadu_si256((const __m256i*)fg_pixels); - AVX2_SHADE_SIMPLE(fg); - _mm256_storeu_si256((__m256i*)dest, fg); - - // Next step in u,v. - dest += 8; - } - } - else - { - AVX2_SHADE_INIT(light, shade_constants); - - while (sse_count--) - { - uint32_t fg_pixels[8]; - for (int i = 0; i < 8; i++) - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - fg_pixels[i] = source[spot]; - xfrac += xstep; - yfrac += ystep; - } - - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - __m256i fg = _mm256_loadu_si256((const __m256i*)fg_pixels); - AVX2_SHADE(fg, shade_constants); - _mm256_storeu_si256((__m256i*)dest, fg); - - // Next step in u,v. - dest += 8; - } - } - - if (count == 0) - return; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; - - int sse_count = count / 8; - count -= sse_count * 8; - - if (shade_constants.simple_shade) - { - AVX2_SHADE_SIMPLE_INIT(light); - - while (sse_count--) - { - uint32_t fg_pixels[8]; - for (int i = 0; i < 8; i++) - { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - fg_pixels[i] = source[spot]; - xfrac += xstep; - yfrac += ystep; - } - - // Lookup pixel from flat texture tile - __m256i fg = _mm256_loadu_si256((const __m256i*)fg_pixels); - AVX2_SHADE_SIMPLE(fg); - _mm256_storeu_si256((__m256i*)dest, fg); - dest += 8; - } - } - else - { - AVX2_SHADE_INIT(light, shade_constants); - - while (sse_count--) - { - uint32_t fg_pixels[8]; - for (int i = 0; i < 8; i++) - { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - fg_pixels[i] = source[spot]; - xfrac += xstep; - yfrac += ystep; - } - - // Lookup pixel from flat texture tile - __m256i fg = _mm256_loadu_si256((const __m256i*)fg_pixels); - AVX2_SHADE_SIMPLE(fg); - _mm256_storeu_si256((__m256i*)dest, fg); - dest += 4; - } - } - - if (count == 0) - return; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - } -#else - void Execute(DrawerThread *thread) override - { - if (thread->line_skipped_by_thread(_y)) - return; - - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const uint32_t* source = _source; - int count; - int spot; - - xfrac = _xfrac; - yfrac = _yfrac; - - dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - - count = _x2 - _x1 + 1; - - xstep = _xstep; - ystep = _ystep; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - if (_xbits == 6 && _ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - - int sse_count = count / 4; - count -= sse_count * 4; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT(light); - - while (sse_count--) - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - - // Next step in u,v. - dest += 4; - } - } - else - { - SSE_SHADE_INIT(light, shade_constants); - - while (sse_count--) - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - - // Next step in u,v. - dest += 4; - } - } - - if (count == 0) - return; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; - - int sse_count = count / 4; - count -= sse_count * 4; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT(light); - - while (sse_count--) - { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += 4; - } - } - else - { - SSE_SHADE_INIT(light, shade_constants); - - while (sse_count--) - { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - dest += 4; - } - } - - if (count == 0) - return; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - } -#endif }; class DrawSpanMaskedRGBACommand : public DrawerCommand @@ -2698,7 +2328,6 @@ public: } } -#ifdef NO_SSE void Execute(DrawerThread *thread) override { int count = thread->count_for_thread(_dest_y, _count); @@ -2735,165 +2364,6 @@ public: dest += pitch; } while (--count); } -#elif defined(USE_AVX) - void Execute(DrawerThread *thread) override - { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int bits = vlinebits; - int pitch = _pitch * thread->num_cores; - - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - - if (count & 1) - { - DWORD place; - dest[0] = shade_bgra(bufplce[0][(place = local_vplce[0]) >> bits], light0, shade_constants); local_vplce[0] = place + local_vince[0]; - dest[1] = shade_bgra(bufplce[1][(place = local_vplce[1]) >> bits], light1, shade_constants); local_vplce[1] = place + local_vince[1]; - dest[2] = shade_bgra(bufplce[2][(place = local_vplce[2]) >> bits], light2, shade_constants); local_vplce[2] = place + local_vince[2]; - dest[3] = shade_bgra(bufplce[3][(place = local_vplce[3]) >> bits], light3, shade_constants); local_vplce[3] = place + local_vince[3]; - dest += pitch; - } - count /= 2; - - // Assume all columns come from the same texture (which they do): - const uint32_t *base_addr = MIN(MIN(MIN(bufplce[0], bufplce[1]), bufplce[2]), bufplce[3]); - __m256i column_offsets = _mm256_set_epi32( - bufplce[3] - base_addr, bufplce[2] - base_addr, bufplce[1] - base_addr, bufplce[0] - base_addr, - bufplce[3] - base_addr, bufplce[2] - base_addr, bufplce[1] - base_addr, bufplce[0] - base_addr); - - __m256i place = _mm256_set_epi32( - local_vplce[3] + local_vince[3], local_vplce[2] + local_vince[2], local_vplce[1] + local_vince[1], local_vplce[0] + local_vince[0], - local_vplce[3], local_vplce[2], local_vplce[1], local_vplce[0]); - - __m256i step = _mm256_set_epi32( - local_vince[3], local_vince[2], local_vince[1], local_vince[0], - local_vince[3], local_vince[2], local_vince[1], local_vince[0]); - step = _mm256_add_epi32(step, step); - - if (shade_constants.simple_shade) - { - AVX2_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - while (count--) - { - __m256i fg = _mm256_i32gather_epi32((const int *)base_addr, _mm256_add_epi32(column_offsets, _mm256_srli_epi32(place, bits)), 4); - place = _mm256_add_epi32(place, step); - AVX2_SHADE_SIMPLE(fg); - _mm256_storeu2_m128i((__m128i*)(dest + pitch), (__m128i*)dest, fg); - dest += pitch * 2; - } - } - else - { - AVX2_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - while (count--) - { - __m256i fg = _mm256_i32gather_epi32((const int *)base_addr, _mm256_add_epi32(column_offsets, _mm256_srai_epi32(place, bits)), 4); - place = _mm256_add_epi32(place, step); - AVX2_SHADE(fg, shade_constants); - _mm256_storeu2_m128i((__m128i*)(dest + pitch), (__m128i*)dest, fg); - dest += pitch * 2; - } - } - } -#else - void Execute(DrawerThread *thread) override - { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int bits = vlinebits; - int pitch = _pitch * thread->num_cores; - - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - uint32_t p0 = bufplce[0][place0 >> bits]; - uint32_t p1 = bufplce[1][place1 >> bits]; - uint32_t p2 = bufplce[2][place2 >> bits]; - uint32_t p3 = bufplce[3][place3 >> bits]; - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); - } - else - { - SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - uint32_t p0 = bufplce[0][place0 >> bits]; - uint32_t p1 = bufplce[1][place1 >> bits]; - uint32_t p2 = bufplce[2][place2 >> bits]; - uint32_t p3 = bufplce[3][place3 >> bits]; - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); - } - } -#endif }; class Mvlinec1RGBACommand : public DrawerCommand @@ -2980,7 +2450,6 @@ public: } } -#ifdef NO_SSE void Execute(DrawerThread *thread) override { int count = thread->count_for_thread(_dest_y, _count); @@ -3018,93 +2487,6 @@ public: dest += pitch; } while (--count); } -#else - void Execute(DrawerThread *thread) override - { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - int bits = mvlinebits; - - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - uint32_t pix0 = bufplce[0][place0 >> bits]; - uint32_t pix1 = bufplce[1][place1 >> bits]; - uint32_t pix2 = bufplce[2][place2 >> bits]; - uint32_t pix3 = bufplce[3][place3 >> bits]; - - // movemask = !(pix == 0) - __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - SSE_SHADE_SIMPLE(fg); - _mm_maskmoveu_si128(fg, movemask, (char*)dest); - dest += pitch; - } while (--count); - } - else - { - SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - uint32_t pix0 = bufplce[0][place0 >> bits]; - uint32_t pix1 = bufplce[1][place1 >> bits]; - uint32_t pix2 = bufplce[2][place2 >> bits]; - uint32_t pix3 = bufplce[3][place3 >> bits]; - - // movemask = !(pix == 0) - __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - SSE_SHADE(fg, shade_constants); - _mm_maskmoveu_si128(fg, movemask, (char*)dest); - dest += pitch; - } while (--count); - } - } -#endif }; class Tmvline1AddRGBACommand : public DrawerCommand @@ -4254,7 +3636,14 @@ void R_DrawRevSubClampTranslatedColumn_rgba() void R_DrawSpan_rgba() { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); +#endif } void R_DrawSpanMasked_rgba() @@ -4304,7 +3693,14 @@ DWORD vlinec1_rgba() void vlinec4_rgba() { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } @@ -4317,7 +3713,14 @@ DWORD mvlinec1_rgba() void mvlinec4_rgba() { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 8f051b4cb..174478162 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -424,59 +424,124 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) return 0xff000000 | (red << 16) | (green << 8) | blue; } -// Calculate constants for a simple shade -#define AVX2_SHADE_SIMPLE_INIT(light) \ - __m256i mlight = _mm256_set_epi16(256, light, light, light, 256, light, light, light, 256, light, light, light, 256, light, light, light); +// Calculate constants for a simple shade with gamma correction +#define AVX_LINEAR_SHADE_SIMPLE_INIT(light) \ + __m256 mlight_hi = _mm256_set_ps(1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f), 1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f)); \ + mlight_hi = _mm256_mul_ps(mlight_hi, mlight_hi); \ + __m256 mlight_lo = mlight_hi; \ + __m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \ + __m256 m255 = _mm256_set1_ps(255.0f); -// Calculate constants for a simple shade with different light levels for each pixel -#define AVX2_SHADE_SIMPLE_INIT4(light3, light2, light1, light0) \ - __m256i mlight = _mm256_set_epi16(256, light3, light3, light3, 256, light2, light2, light2, 256, light1, light1, light1, 256, light0, light0, light0); +// Calculate constants for a simple shade with different light levels for each pixel and gamma correction +#define AVX_LINEAR_SHADE_SIMPLE_INIT4(light3, light2, light1, light0) \ + __m256 mlight_hi = _mm256_set_ps(1.0f, light1 * (1.0f/256.0f), light1 * (1.0f/256.0f), light1 * (1.0f/256.0f), 1.0f, light0 * (1.0f/256.0f), light0 * (1.0f/256.0f), light0 * (1.0f/256.0f)); \ + __m256 mlight_lo = _mm256_set_ps(1.0f, light3 * (1.0f/256.0f), light3 * (1.0f/256.0f), light3 * (1.0f/256.0f), 1.0f, light2 * (1.0f/256.0f), light2 * (1.0f/256.0f), light2 * (1.0f/256.0f)); \ + mlight_hi = _mm256_mul_ps(mlight_hi, mlight_hi); \ + mlight_lo = _mm256_mul_ps(mlight_lo, mlight_lo); \ + __m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \ + __m256 m255 = _mm256_set1_ps(255.0f); -// Simple shade 8 pixels -#define AVX2_SHADE_SIMPLE(fg) { \ - __m256i fg_hi = _mm256_unpackhi_epi8(fg, _mm256_setzero_si256()); \ - __m256i fg_lo = _mm256_unpacklo_epi8(fg, _mm256_setzero_si256()); \ - fg_hi = _mm256_mullo_epi16(fg_hi, mlight); \ - fg_hi = _mm256_srli_epi16(fg_hi, 8); \ - fg_lo = _mm256_mullo_epi16(fg_lo, mlight); \ - fg_lo = _mm256_srli_epi16(fg_lo, 8); \ - fg = _mm256_packus_epi16(fg_lo, fg_hi); \ +// Simple shade 4 pixels with gamma correction +#define AVX_LINEAR_SHADE_SIMPLE(fg) { \ + __m256i fg_16 = _mm256_set_m128i(_mm_unpackhi_epi8(fg, _mm_setzero_si128()), _mm_unpacklo_epi8(fg, _mm_setzero_si128())); \ + __m256 fg_hi = _mm256_cvtepi32_ps(_mm256_unpackhi_epi16(fg_16, _mm256_setzero_si256())); \ + __m256 fg_lo = _mm256_cvtepi32_ps(_mm256_unpacklo_epi16(fg_16, _mm256_setzero_si256())); \ + fg_hi = _mm256_mul_ps(fg_hi, mrcp_255); \ + fg_hi = _mm256_mul_ps(fg_hi, fg_hi); \ + fg_hi = _mm256_mul_ps(fg_hi, mlight_hi); \ + fg_hi = _mm256_sqrt_ps(fg_hi); \ + fg_hi = _mm256_mul_ps(fg_hi, m255); \ + fg_lo = _mm256_mul_ps(fg_lo, mrcp_255); \ + fg_lo = _mm256_mul_ps(fg_lo, fg_lo); \ + fg_lo = _mm256_mul_ps(fg_lo, mlight_lo); \ + fg_lo = _mm256_sqrt_ps(fg_lo); \ + fg_lo = _mm256_mul_ps(fg_lo, m255); \ + fg_16 = _mm256_packus_epi32(_mm256_cvtps_epi32(fg_lo), _mm256_cvtps_epi32(fg_hi)); \ + fg = _mm_packus_epi16(_mm256_extractf128_si256(fg_16, 0), _mm256_extractf128_si256(fg_16, 1)); \ } -// Calculate constants for a complex shade -#define AVX2_SHADE_INIT(light, shade_constants) \ - __m256i mlight = _mm256_set_epi16(256, light, light, light, 256, light, light, light, 256, light, light, light, 256, light, light, light); \ - __m256i color = _mm256_set_epi16( \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ - __m256i fade = _mm256_set_epi16( \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ - __m256i fade_amount = _mm256_mullo_epi16(fade, _mm256_subs_epu16(_mm256_set1_epi16(256), mlight)); \ - __m256i desaturate = _mm256_set1_epi16(shade_constants.desaturate); \ - __m256i inv_desaturate = _mm256_set1_epi16(256 - shade_constants.desaturate); +// Calculate constants for a complex shade with gamma correction +#define AVX_LINEAR_SHADE_INIT(light, shade_constants) \ + __m256 mlight_hi = _mm256_set_ps(1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f), 1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f)); \ + mlight_hi = _mm256_mul_ps(mlight_hi, mlight_hi); \ + __m256 mlight_lo = mlight_hi; \ + __m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \ + __m256 m255 = _mm256_set1_ps(255.0f); \ + __m256 color = _mm256_set_ps( \ + shade_constants.light_alpha * (1.0f/256.0f), shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f), \ + shade_constants.light_alpha * (1.0f/256.0f), shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f)); \ + __m256 fade = _mm256_set_ps( \ + shade_constants.fade_alpha * (1.0f/256.0f), shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f), \ + shade_constants.fade_alpha * (1.0f/256.0f), shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f)); \ + __m256 fade_amount_hi = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_hi)); \ + __m256 fade_amount_lo = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_lo)); \ + __m256 inv_desaturate = _mm256_set1_ps((256 - shade_constants.desaturate) * (1.0f/256.0f)); \ + __m128 ss_desaturate = _mm_set_ss(shade_constants.desaturate * (1.0f/256.0f)); \ + __m128 intensity_weight = _mm_set_ps(0.0f, 77.0f/256.0f, 143.0f/256.0f, 37.0f/256.0f); -// Calculate constants for a complex shade with different light levels for each pixel -#define AVX2_SHADE_INIT4(light3, light2, light1, light0, shade_constants) \ - __m256i mlight = _mm256_set_epi16(256, light3, light3, light3, 256, light2, light2, light2, 256, light1, light1, light1, 256, light0, light0, light0); \ - __m256i color = _mm256_set_epi16( \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ - __m256i fade = _mm256_set_epi16( \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ - __m256i fade_amount = _mm256_mullo_epi16(fade, _mm256_subs_epu16(_mm256_set1_epi16(256), mlight)); \ - __m256i desaturate = _mm256_set1_epi16(shade_constants.desaturate); \ - __m256i inv_desaturate = _mm256_set1_epi16(256 - shade_constants.desaturate); +// Calculate constants for a complex shade with different light levels for each pixel and gamma correction +#define AVX_LINEAR_SHADE_INIT4(light3, light2, light1, light0, shade_constants) \ + __m256 mlight_hi = _mm256_set_ps(1.0f, light1 * (1.0f/256.0f), light1 * (1.0f/256.0f), light1 * (1.0f/256.0f), 1.0f, light0 * (1.0f/256.0f), light0 * (1.0f/256.0f), light0 * (1.0f/256.0f)); \ + __m256 mlight_lo = _mm256_set_ps(1.0f, light3 * (1.0f/256.0f), light3 * (1.0f/256.0f), light3 * (1.0f/256.0f), 1.0f, light2 * (1.0f/256.0f), light2 * (1.0f/256.0f), light2 * (1.0f/256.0f)); \ + mlight_hi = _mm256_mul_ps(mlight_hi, mlight_hi); \ + mlight_lo = _mm256_mul_ps(mlight_lo, mlight_lo); \ + __m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \ + __m256 m255 = _mm256_set1_ps(255.0f); \ + __m256 color = _mm256_set_ps( \ + shade_constants.light_alpha * (1.0f/256.0f), shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f), \ + shade_constants.light_alpha * (1.0f/256.0f), shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f)); \ + __m256 fade = _mm256_set_ps( \ + shade_constants.fade_alpha * (1.0f/256.0f), shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f), \ + shade_constants.fade_alpha * (1.0f/256.0f), shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f)); \ + __m256 fade_amount_hi = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_hi)); \ + __m256 fade_amount_lo = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_lo)); \ + __m256 inv_desaturate = _mm256_set1_ps((256 - shade_constants.desaturate) * (1.0f/256.0f)); \ + __m128 ss_desaturate = _mm_set_ss(shade_constants.desaturate * (1.0f/256.0f)); \ + __m128 intensity_weight = _mm_set_ps(0.0f, 77.0f/256.0f, 143.0f/256.0f, 37.0f/256.0f); +// Complex shade 4 pixels with gamma correction +#define AVX_LINEAR_SHADE(fg, shade_constants) { \ + __m256i fg_16 = _mm256_set_m128i(_mm_unpackhi_epi8(fg, _mm_setzero_si128()), _mm_unpacklo_epi8(fg, _mm_setzero_si128())); \ + __m256 fg_hi = _mm256_cvtepi32_ps(_mm256_unpackhi_epi16(fg_16, _mm256_setzero_si256())); \ + __m256 fg_lo = _mm256_cvtepi32_ps(_mm256_unpacklo_epi16(fg_16, _mm256_setzero_si256())); \ + fg_hi = _mm256_mul_ps(fg_hi, mrcp_255); \ + fg_hi = _mm256_mul_ps(fg_hi, fg_hi); \ + fg_lo = _mm256_mul_ps(fg_lo, mrcp_255); \ + fg_lo = _mm256_mul_ps(fg_lo, fg_lo); \ + \ + __m128 intensity_hi0 = _mm_mul_ps(_mm256_extractf128_ps(fg_hi, 0), intensity_weight); \ + __m128 intensity_hi1 = _mm_mul_ps(_mm256_extractf128_ps(fg_hi, 1), intensity_weight); \ + intensity_hi0 = _mm_mul_ss(_mm_add_ss(_mm_add_ss(intensity_hi0, _mm_shuffle_ps(intensity_hi0, intensity_hi0, _MM_SHUFFLE(1,1,1,1))), _mm_shuffle_ps(intensity_hi0, intensity_hi0, _MM_SHUFFLE(2,2,2,2))), ss_desaturate); \ + intensity_hi0 = _mm_shuffle_ps(intensity_hi0, intensity_hi0, _MM_SHUFFLE(0,0,0,0)); \ + intensity_hi1 = _mm_mul_ss(_mm_add_ss(_mm_add_ss(intensity_hi1, _mm_shuffle_ps(intensity_hi1, intensity_hi1, _MM_SHUFFLE(1,1,1,1))), _mm_shuffle_ps(intensity_hi1, intensity_hi1, _MM_SHUFFLE(2,2,2,2))), ss_desaturate); \ + intensity_hi1 = _mm_shuffle_ps(intensity_hi1, intensity_hi1, _MM_SHUFFLE(0,0,0,0)); \ + __m256 intensity_hi = _mm256_set_m128(intensity_hi1, intensity_hi0); \ + \ + fg_hi = _mm256_add_ps(_mm256_mul_ps(fg_hi, inv_desaturate), intensity_hi); \ + fg_hi = _mm256_add_ps(_mm256_mul_ps(fg_hi, mlight_hi), fade_amount_hi); \ + fg_hi = _mm256_mul_ps(fg_hi, color); \ + \ + __m128 intensity_lo0 = _mm_mul_ps(_mm256_extractf128_ps(fg_lo, 0), intensity_weight); \ + __m128 intensity_lo1 = _mm_mul_ps(_mm256_extractf128_ps(fg_lo, 1), intensity_weight); \ + intensity_lo0 = _mm_mul_ss(_mm_add_ss(_mm_add_ss(intensity_lo0, _mm_shuffle_ps(intensity_lo0, intensity_lo0, _MM_SHUFFLE(1,1,1,1))), _mm_shuffle_ps(intensity_lo0, intensity_lo0, _MM_SHUFFLE(2,2,2,2))), ss_desaturate); \ + intensity_lo0 = _mm_shuffle_ps(intensity_lo0, intensity_lo0, _MM_SHUFFLE(0,0,0,0)); \ + intensity_lo1 = _mm_mul_ss(_mm_add_ss(_mm_add_ss(intensity_lo1, _mm_shuffle_ps(intensity_lo1, intensity_lo1, _MM_SHUFFLE(1,1,1,1))), _mm_shuffle_ps(intensity_lo1, intensity_lo1, _MM_SHUFFLE(2,2,2,2))), ss_desaturate); \ + intensity_lo1 = _mm_shuffle_ps(intensity_lo1, intensity_lo1, _MM_SHUFFLE(0,0,0,0)); \ + __m256 intensity_lo = _mm256_set_m128(intensity_lo1, intensity_lo0); \ + \ + fg_lo = _mm256_add_ps(_mm256_mul_ps(fg_lo, inv_desaturate), intensity_lo); \ + fg_lo = _mm256_add_ps(_mm256_mul_ps(fg_lo, mlight_lo), fade_amount_lo); \ + fg_lo = _mm256_mul_ps(fg_lo, color); \ + \ + fg_hi = _mm256_sqrt_ps(fg_hi); \ + fg_hi = _mm256_mul_ps(fg_hi, m255); \ + fg_lo = _mm256_sqrt_ps(fg_lo); \ + fg_lo = _mm256_mul_ps(fg_lo, m255); \ + fg_16 = _mm256_packus_epi32(_mm256_cvtps_epi32(fg_lo), _mm256_cvtps_epi32(fg_hi)); \ + fg = _mm_packus_epi16(_mm256_extractf128_si256(fg_16, 0), _mm256_extractf128_si256(fg_16, 1)); \ +} + +/* // Complex shade 8 pixels #define AVX2_SHADE(fg, shade_constants) { \ __m256i fg_hi = _mm256_unpackhi_epi8(fg, _mm256_setzero_si256()); \ @@ -499,7 +564,7 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) \ fg = _mm256_packus_epi16(fg_lo, fg_hi); \ } - +*/ diff --git a/src/r_draw_rgba_sse.h b/src/r_draw_rgba_sse.h new file mode 100644 index 000000000..14ebbbb41 --- /dev/null +++ b/src/r_draw_rgba_sse.h @@ -0,0 +1,491 @@ +// +// SSE/AVX intrinsics based drawers for the r_draw family of drawers. +// +// Note: This header file is intentionally not guarded by a __R_DRAW_RGBA_SSE__ define. +// It is because the code is nearly identical for SSE vs AVX. The file is included +// multiple times by r_draw_rgba.cpp with different defines that changes the class +// names outputted and the type of intrinsics used. + +#ifdef _MSC_VER +#pragma warning(disable: 4752) // warning C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX +#endif + +class VecCommand(DrawSpanRGBA) : public DrawerCommand +{ + const uint32_t * RESTRICT _source; + fixed_t _xfrac; + fixed_t _yfrac; + fixed_t _xstep; + fixed_t _ystep; + int _x1; + int _x2; + int _y; + int _xbits; + int _ybits; + BYTE * RESTRICT _destorg; + fixed_t _light; + ShadeConstants _shade_constants; + +public: + VecCommand(DrawSpanRGBA)() + { + _source = (const uint32_t*)ds_source; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _xstep = ds_xstep; + _ystep = ds_ystep; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xbits = ds_xbits; + _ybits = ds_ybits; + _destorg = dc_destorg; + _light = ds_light; + _shade_constants = ds_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + if (thread->line_skipped_by_thread(_y)) + return; + + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const uint32_t* source = _source; + int count; + int spot; + + xfrac = _xfrac; + yfrac = _yfrac; + + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; + + count = _x2 - _x1 + 1; + + xstep = _xstep; + ystep = _ystep; + + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; + + if (_xbits == 6 && _ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + + int sse_count = count / 4; + count -= sse_count * 4; + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT(light); + + while (sse_count--) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + + // Next step in u,v. + dest += 4; + } + } + else + { + VEC_SHADE_INIT(light, shade_constants); + + while (sse_count--) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + + // Next step in u,v. + dest += 4; + } + } + + if (count == 0) + return; + + do + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + + // Lookup pixel from flat texture tile + *dest++ = shade_bgra(source[spot], light, shade_constants); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + + int sse_count = count / 4; + count -= sse_count * 4; + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT(light); + + while (sse_count--) + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += 4; + } + } + else + { + VEC_SHADE_INIT(light, shade_constants); + + while (sse_count--) + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + dest += 4; + } + } + + if (count == 0) + return; + + do + { + // Current texture index in u,v. + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + // Lookup pixel from flat texture tile + *dest++ = shade_bgra(source[spot], light, shade_constants); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } +}; + +class VecCommand(Vlinec4RGBA) : public DrawerCommand +{ + BYTE * RESTRICT _dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + int vlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const uint32 * RESTRICT bufplce[4]; + +public: + VecCommand(Vlinec4RGBA)() + { + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + vlinebits = ::vlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; + } + } + + void Execute(DrawerThread *thread) override + { + int count = thread->count_for_thread(_dest_y, _count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int bits = vlinebits; + int pitch = _pitch * thread->num_cores; + + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = _shade_constants; + + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + uint32_t p0 = bufplce[0][place0 >> bits]; + uint32_t p1 = bufplce[1][place1 >> bits]; + uint32_t p2 = bufplce[2][place2 >> bits]; + uint32_t p3 = bufplce[3][place3 >> bits]; + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + uint32_t p0 = bufplce[0][place0 >> bits]; + uint32_t p1 = bufplce[1][place1 >> bits]; + uint32_t p2 = bufplce[2][place2 >> bits]; + uint32_t p3 = bufplce[3][place3 >> bits]; + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + dest += pitch; + } while (--count); + } + } +}; + +class VecCommand(Mvlinec4RGBA) : public DrawerCommand +{ + BYTE * RESTRICT _dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + int mvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const uint32 * RESTRICT bufplce[4]; + +public: + VecCommand(Mvlinec4RGBA)() + { + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + mvlinebits = ::mvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; + } + } + + void Execute(DrawerThread *thread) override + { + int count = thread->count_for_thread(_dest_y, _count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; + int bits = mvlinebits; + + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = _shade_constants; + + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + uint32_t pix0 = bufplce[0][place0 >> bits]; + uint32_t pix1 = bufplce[1][place1 >> bits]; + uint32_t pix2 = bufplce[2][place2 >> bits]; + uint32_t pix3 = bufplce[3][place3 >> bits]; + + // movemask = !(pix == 0) + __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + VEC_SHADE_SIMPLE(fg); + _mm_maskmoveu_si128(fg, movemask, (char*)dest); + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + uint32_t pix0 = bufplce[0][place0 >> bits]; + uint32_t pix1 = bufplce[1][place1 >> bits]; + uint32_t pix2 = bufplce[2][place2 >> bits]; + uint32_t pix3 = bufplce[3][place3 >> bits]; + + // movemask = !(pix == 0) + __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + VEC_SHADE(fg, shade_constants); + _mm_maskmoveu_si128(fg, movemask, (char*)dest); + dest += pitch; + } while (--count); + } + } +}; diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 269dd9d9d..4da963430 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -51,6 +51,39 @@ extern unsigned int dc_tspans[4][MAXHEIGHT]; extern unsigned int *dc_ctspan[4]; extern unsigned int *horizspan[4]; +EXTERN_CVAR(Bool, r_linearlight) + +#ifndef NO_SSE + +// Generate SSE drawers: +#define VecCommand(name) name##_SSE_Command +#define VEC_SHADE_SIMPLE_INIT SSE_SHADE_SIMPLE_INIT +#define VEC_SHADE_SIMPLE_INIT4 SSE_SHADE_SIMPLE_INIT4 +#define VEC_SHADE_SIMPLE SSE_SHADE_SIMPLE +#define VEC_SHADE_INIT SSE_SHADE_INIT +#define VEC_SHADE_INIT4 SSE_SHADE_INIT4 +#define VEC_SHADE SSE_SHADE +#include "r_drawt_rgba_sse.h" + +// Generate AVX drawers: +#undef VecCommand +#undef VEC_SHADE_SIMPLE_INIT +#undef VEC_SHADE_SIMPLE_INIT4 +#undef VEC_SHADE_SIMPLE +#undef VEC_SHADE_INIT +#undef VEC_SHADE_INIT4 +#undef VEC_SHADE +#define VecCommand(name) name##_AVX_Command +#define VEC_SHADE_SIMPLE_INIT AVX_LINEAR_SHADE_SIMPLE_INIT +#define VEC_SHADE_SIMPLE_INIT4 AVX_LINEAR_SHADE_SIMPLE_INIT4 +#define VEC_SHADE_SIMPLE AVX_LINEAR_SHADE_SIMPLE +#define VEC_SHADE_INIT AVX_LINEAR_SHADE_INIT +#define VEC_SHADE_INIT4 AVX_LINEAR_SHADE_INIT4 +#define VEC_SHADE AVX_LINEAR_SHADE +#include "r_drawt_rgba_sse.h" + +#endif + ///////////////////////////////////////////////////////////////////////////// class RtCopy1colRGBACommand : public DrawerCommand @@ -206,7 +239,6 @@ public: _colormap = dc_colormap; } -#ifdef NO_SSE void Execute(DrawerThread *thread) override { uint32_t *source; @@ -253,132 +285,6 @@ public: dest += pitch * 2; } while (--count); } -#else - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - ShadeConstants shade_constants = _shade_constants; - uint32_t light = calc_light_multiplier(_light); - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = thread->num_cores * 4; - - BYTE *colormap = _colormap; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT(light); - - if (count & 1) { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - - source += sincr; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - // shade_pal_index 0-3 - { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - } - - // shade_pal_index 4-7 (pitch) - { - uint32_t p0 = colormap[source[sincr]]; - uint32_t p1 = colormap[source[sincr + 1]]; - uint32_t p2 = colormap[source[sincr + 2]]; - uint32_t p3 = colormap[source[sincr + 3]]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)(dest + pitch), fg); - } - - source += sincr * 2; - dest += pitch * 2; - } while (--count); - } - else - { - SSE_SHADE_INIT(light, shade_constants); - - if (count & 1) { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - - source += sincr; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - // shade_pal_index 0-3 - { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - } - - // shade_pal_index 4-7 (pitch) - { - uint32_t p0 = colormap[source[sincr]]; - uint32_t p1 = colormap[source[sincr + 1]]; - uint32_t p2 = colormap[source[sincr + 2]]; - uint32_t p3 = colormap[source[sincr + 3]]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)(dest + pitch), fg); - } - - source += sincr * 2; - dest += pitch * 2; - } while (--count); - } - } -#endif }; class RtTranslate1colRGBACommand : public DrawerCommand @@ -607,7 +513,6 @@ public: _destalpha = dc_destalpha; } -#ifdef NO_SSE void Execute(DrawerThread *thread) override { uint32_t *source; @@ -655,107 +560,6 @@ public: dest += pitch; } while (--count); } -#else - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t light = calc_light_multiplier(_light); - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - BYTE *colormap = _colormap; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - ShadeConstants shade_constants = _shade_constants; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT(light); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += sincr; - dest += pitch; - } while (--count); - } - else - { - SSE_SHADE_INIT(light, shade_constants); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += sincr; - dest += pitch; - } while (--count); - } - } -#endif }; class RtShaded1colRGBACommand : public DrawerCommand @@ -853,7 +657,6 @@ public: _light = dc_light; } -#ifdef NO_SSE void Execute(DrawerThread *thread) override { BYTE *colormap; @@ -898,57 +701,6 @@ public: dest += pitch; } while (--count); } -#else - void Execute(DrawerThread *thread) override - { - BYTE *colormap; - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - colormap = _colormap; - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(_color, calc_light_multiplier(_light))), _mm_setzero_si128()); - __m128i alpha_one = _mm_set1_epi16(64); - - do { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - __m128i alpha_hi = _mm_set_epi16(64, p3, p3, p3, 64, p2, p2, p2); - __m128i alpha_lo = _mm_set_epi16(64, p1, p1, p1, 64, p0, p0, p0); - __m128i inv_alpha_hi = _mm_subs_epu16(alpha_one, alpha_hi); - __m128i inv_alpha_lo = _mm_subs_epu16(alpha_one, alpha_lo); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * alpha + bg_red * inv_alpha) / 64: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_hi), _mm_mullo_epi16(bg_hi, inv_alpha_hi)), 6); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_lo), _mm_mullo_epi16(bg_lo, inv_alpha_lo)), 6); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += sincr; - dest += pitch; - } while (--count); - } -#endif }; class RtAddClamp1colRGBACommand : public DrawerCommand @@ -1051,7 +803,6 @@ public: _shade_constants = dc_shade_constants; } -#ifdef NO_SSE void Execute(DrawerThread *thread) override { uint32_t *source; @@ -1097,106 +848,6 @@ public: dest += pitch; } while (--count); } -#else - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t light = calc_light_multiplier(_light); - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - ShadeConstants shade_constants = _shade_constants; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT(light); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += sincr; - dest += pitch; - } while (--count); - } - else - { - SSE_SHADE_INIT(light, shade_constants); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += sincr; - dest += pitch; - } while (--count); - } - } -#endif }; class RtSubClamp1colRGBACommand : public DrawerCommand @@ -1657,7 +1308,14 @@ void rt_map1col_rgba (int hx, int sx, int yl, int yh) // Maps all four spans to the screen starting at sx. void rt_map4cols_rgba (int sx, int yl, int yh) { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(sx, yl, yh); +#endif } void rt_Translate1col_rgba(const BYTE *translation, int hx, int yl, int yh) @@ -1693,7 +1351,14 @@ void rt_add1col_rgba (int hx, int sx, int yl, int yh) // Adds all four spans to the screen starting at sx without clamping. void rt_add4cols_rgba (int sx, int yl, int yh) { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(sx, yl, yh); +#endif } // Translates and adds one span at hx to the screen at sx without clamping. @@ -1719,7 +1384,14 @@ void rt_shaded1col_rgba (int hx, int sx, int yl, int yh) // Shades all four spans to the screen starting at sx. void rt_shaded4cols_rgba (int sx, int yl, int yh) { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(sx, yl, yh); +#endif } // Adds one span at hx to the screen at sx with clamping. @@ -1731,7 +1403,14 @@ void rt_addclamp1col_rgba (int hx, int sx, int yl, int yh) // Adds all four spans to the screen starting at sx with clamping. void rt_addclamp4cols_rgba (int sx, int yl, int yh) { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(sx, yl, yh); +#endif } // Translates and adds one span at hx to the screen at sx with clamping. diff --git a/src/r_drawt_rgba_sse.h b/src/r_drawt_rgba_sse.h new file mode 100644 index 000000000..5b8ae8081 --- /dev/null +++ b/src/r_drawt_rgba_sse.h @@ -0,0 +1,495 @@ +// +// SSE/AVX intrinsics based drawers for the r_drawt family of drawers. +// +// Note: This header file is intentionally not guarded by a __R_DRAWT_RGBA_SSE__ define. +// It is because the code is nearly identical for SSE vs AVX. The file is included +// multiple times by r_drawt_rgba.cpp with different defines that changes the class +// names outputted and the type of intrinsics used. + +#ifdef _MSC_VER +#pragma warning(disable: 4752) // warning C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX +#endif + +class VecCommand(RtMap4colsRGBA) : public DrawerCommand +{ + int sx; + int yl; + int yh; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE * RESTRICT _destorg; + int _pitch; + BYTE * RESTRICT _colormap; + +public: + VecCommand(RtMap4colsRGBA)(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + _light = dc_light; + _shade_constants = dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _colormap = dc_colormap; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + int sincr; + + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) + return; + + ShadeConstants shade_constants = _shade_constants; + uint32_t light = calc_light_multiplier(_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = _pitch * thread->num_cores; + sincr = thread->num_cores * 4; + + BYTE *colormap = _colormap; + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT(light); + + if (count & 1) { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + + source += sincr; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + // shade_pal_index 0-3 + { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + } + + // shade_pal_index 4-7 (pitch) + { + uint32_t p0 = colormap[source[sincr]]; + uint32_t p1 = colormap[source[sincr + 1]]; + uint32_t p2 = colormap[source[sincr + 2]]; + uint32_t p3 = colormap[source[sincr + 3]]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)(dest + pitch), fg); + } + + source += sincr * 2; + dest += pitch * 2; + } while (--count); + } + else + { + VEC_SHADE_INIT(light, shade_constants); + + if (count & 1) { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + + source += sincr; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + // shade_pal_index 0-3 + { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + } + + // shade_pal_index 4-7 (pitch) + { + uint32_t p0 = colormap[source[sincr]]; + uint32_t p1 = colormap[source[sincr + 1]]; + uint32_t p2 = colormap[source[sincr + 2]]; + uint32_t p3 = colormap[source[sincr + 3]]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)(dest + pitch), fg); + } + + source += sincr * 2; + dest += pitch * 2; + } while (--count); + } + } +}; + +class VecCommand(RtAdd4colsRGBA) : public DrawerCommand +{ + int sx; + int yl; + int yh; + BYTE * RESTRICT _destorg; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE * RESTRICT _colormap; + fixed_t _srcalpha; + fixed_t _destalpha; + +public: + VecCommand(RtAdd4colsRGBA)(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _colormap = dc_colormap; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + int sincr; + + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) + return; + + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = _pitch * thread->num_cores; + sincr = 4 * thread->num_cores; + + uint32_t light = calc_light_multiplier(_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + BYTE *colormap = _colormap; + + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + + ShadeConstants shade_constants = _shade_constants; + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT(light); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += sincr; + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT(light, shade_constants); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE(fg, shade_constants); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += sincr; + dest += pitch; + } while (--count); + } + } +}; + +class VecCommand(RtShaded4colsRGBA) : public DrawerCommand +{ + int sx; + int yl; + int yh; + lighttable_t * RESTRICT _colormap; + int _color; + BYTE * RESTRICT _destorg; + int _pitch; + fixed_t _light; + +public: + VecCommand(RtShaded4colsRGBA)(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + _colormap = dc_colormap; + _color = dc_color; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + } + + void Execute(DrawerThread *thread) override + { + BYTE *colormap; + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + int sincr; + + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) + return; + + colormap = _colormap; + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = _pitch * thread->num_cores; + sincr = 4 * thread->num_cores; + + __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(_color, calc_light_multiplier(_light))), _mm_setzero_si128()); + __m128i alpha_one = _mm_set1_epi16(64); + + do { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + __m128i alpha_hi = _mm_set_epi16(64, p3, p3, p3, 64, p2, p2, p2); + __m128i alpha_lo = _mm_set_epi16(64, p1, p1, p1, 64, p0, p0, p0); + __m128i inv_alpha_hi = _mm_subs_epu16(alpha_one, alpha_hi); + __m128i inv_alpha_lo = _mm_subs_epu16(alpha_one, alpha_lo); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * alpha + bg_red * inv_alpha) / 64: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_hi), _mm_mullo_epi16(bg_hi, inv_alpha_hi)), 6); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_lo), _mm_mullo_epi16(bg_lo, inv_alpha_lo)), 6); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += sincr; + dest += pitch; + } while (--count); + } +}; + +class VecCommand(RtAddClamp4colsRGBA) : public DrawerCommand +{ + int sx; + int yl; + int yh; + BYTE * RESTRICT _destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; + +public: + VecCommand(RtAddClamp4colsRGBA)(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + int sincr; + + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) + return; + + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = _pitch * thread->num_cores; + sincr = 4 * thread->num_cores; + + uint32_t light = calc_light_multiplier(_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + + ShadeConstants shade_constants = _shade_constants; + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT(light); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += sincr; + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT(light, shade_constants); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE(fg, shade_constants); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += sincr; + dest += pitch; + } while (--count); + } + } +}; From 38aba81dcc816ce9bb0888f95b94f73714771f67 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 19 Jun 2016 23:11:41 +0200 Subject: [PATCH 60/94] Added more SSE drawers --- src/r_draw_rgba.cpp | 313 ++++++++++++---------- src/r_draw_rgba.h | 58 +++- src/r_draw_rgba_sse.h | 583 ++++++++++++++++++++++++++++++++++++++++- src/r_drawt_rgba.cpp | 14 + src/r_drawt_rgba_sse.h | 252 ++++++++++++++++++ 5 files changed, 1072 insertions(+), 148 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 28c5df2ac..96232ab0c 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -2411,10 +2411,7 @@ public: do { uint32_t pix = source[frac >> bits]; - if (pix != 0) - { - *dest = shade_bgra(pix, light, shade_constants); - } + *dest = alpha_blend(shade_bgra(pix, light, shade_constants), *dest); frac += fracstep; dest += pitch; } while (--count); @@ -2480,10 +2477,10 @@ public: do { uint32_t pix; - pix = bufplce[0][(place = local_vplce[0]) >> bits]; if (pix) dest[0] = shade_bgra(pix, light0, shade_constants); local_vplce[0] = place + local_vince[0]; - pix = bufplce[1][(place = local_vplce[1]) >> bits]; if (pix) dest[1] = shade_bgra(pix, light1, shade_constants); local_vplce[1] = place + local_vince[1]; - pix = bufplce[2][(place = local_vplce[2]) >> bits]; if (pix) dest[2] = shade_bgra(pix, light2, shade_constants); local_vplce[2] = place + local_vince[2]; - pix = bufplce[3][(place = local_vplce[3]) >> bits]; if (pix) dest[3] = shade_bgra(pix, light3, shade_constants); local_vplce[3] = place + local_vince[3]; + pix = bufplce[0][(place = local_vplce[0]) >> bits]; dest[0] = alpha_blend(shade_bgra(pix, light0, shade_constants), dest[0]); local_vplce[0] = place + local_vince[0]; + pix = bufplce[1][(place = local_vplce[1]) >> bits]; dest[1] = alpha_blend(shade_bgra(pix, light1, shade_constants), dest[1]); local_vplce[1] = place + local_vince[1]; + pix = bufplce[2][(place = local_vplce[2]) >> bits]; dest[2] = alpha_blend(shade_bgra(pix, light2, shade_constants), dest[2]); local_vplce[2] = place + local_vince[2]; + pix = bufplce[3][(place = local_vplce[3]) >> bits]; dest[3] = alpha_blend(shade_bgra(pix, light3, shade_constants), dest[3]); local_vplce[3] = place + local_vince[3]; dest += pitch; } while (--count); } @@ -2535,29 +2532,31 @@ public: uint32_t light = calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t pix = source[frac >> bits]; - if (pix != 0) - { - uint32_t fg = shade_bgra(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; + uint32_t fg_alpha, bg_alpha; + calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + uint32_t fg = shade_bgra(pix, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } frac += fracstep; dest += pitch; } while (--count); @@ -2615,8 +2614,8 @@ public: ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; @@ -2632,23 +2631,25 @@ public: for (int i = 0; i < 4; ++i) { uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - if (pix != 0) - { - uint32_t fg = shade_bgra(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; + uint32_t fg_alpha, bg_alpha; + calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + uint32_t fg = shade_bgra(pix, light[i], shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } local_vplce[i] += local_vince[i]; } dest += pitch; @@ -2702,29 +2703,31 @@ public: uint32_t light = calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t pix = source[frac >> bits]; - if (pix != 0) - { - uint32_t fg = shade_bgra(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; + uint32_t fg_alpha, bg_alpha; + calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + uint32_t fg = shade_bgra(pix, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } frac += fracstep; dest += pitch; } while (--count); @@ -2782,8 +2785,8 @@ public: ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; @@ -2799,23 +2802,25 @@ public: for (int i = 0; i < 4; ++i) { uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - if (pix != 0) - { - uint32_t fg = shade_bgra(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; + uint32_t fg_alpha, bg_alpha; + calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + uint32_t fg = shade_bgra(pix, light[i], shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } local_vplce[i] += local_vince[i]; } dest += pitch; @@ -2869,29 +2874,31 @@ public: uint32_t light = calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t pix = source[frac >> bits]; - if (pix != 0) - { - uint32_t fg = shade_bgra(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; + uint32_t fg_alpha, bg_alpha; + calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t fg = shade_bgra(pix, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } frac += fracstep; dest += pitch; } while (--count); @@ -2949,8 +2956,8 @@ public: ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; @@ -2966,23 +2973,25 @@ public: for (int i = 0; i < 4; ++i) { uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - if (pix != 0) - { - uint32_t fg = shade_bgra(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; + uint32_t fg_alpha, bg_alpha; + calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t fg = shade_bgra(pix, light[i], shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } local_vplce[i] += local_vince[i]; } dest += pitch; @@ -3036,29 +3045,31 @@ public: uint32_t light = calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t pix = source[frac >> bits]; - if (pix != 0) - { - uint32_t fg = shade_bgra(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; + uint32_t fg_alpha, bg_alpha; + calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t fg = shade_bgra(pix, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } frac += fracstep; dest += pitch; } while (--count); @@ -3116,8 +3127,8 @@ public: ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; @@ -3133,23 +3144,25 @@ public: for (int i = 0; i < 4; ++i) { uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - if (pix != 0) - { - uint32_t fg = shade_bgra(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; + uint32_t fg_alpha, bg_alpha; + calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t fg = shade_bgra(pix, light[i], shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } local_vplce[i] += local_vince[i]; } dest += pitch; @@ -3733,7 +3746,14 @@ fixed_t tmvline1_add_rgba() void tmvline4_add_rgba() { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } @@ -3746,7 +3766,14 @@ fixed_t tmvline1_addclamp_rgba() void tmvline4_addclamp_rgba() { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } @@ -3759,7 +3786,14 @@ fixed_t tmvline1_subclamp_rgba() void tmvline4_subclamp_rgba() { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } @@ -3772,7 +3806,14 @@ fixed_t tmvline1_revsubclamp_rgba() void tmvline4_revsubclamp_rgba() { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 174478162..66be1f38b 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -417,9 +417,9 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) uint32_t bg_green = (bg >> 8) & 0xff; uint32_t bg_blue = bg & 0xff; - uint32_t red = ((fg_red * alpha) + (bg_red * inv_alpha)) / 256; - uint32_t green = ((fg_green * alpha) + (bg_green * inv_alpha)) / 256; - uint32_t blue = ((fg_blue * alpha) + (bg_blue * inv_alpha)) / 256; + uint32_t red = clamp(fg_red + (bg_red * inv_alpha) / 256, 0, 255); + uint32_t green = clamp(fg_green + (bg_green * inv_alpha) / 256, 0, 255); + uint32_t blue = clamp(fg_blue + (bg_blue * inv_alpha) / 256, 0, 255); return 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -543,7 +543,7 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) /* // Complex shade 8 pixels -#define AVX2_SHADE(fg, shade_constants) { \ +#define AVX_SHADE(fg, shade_constants) { \ __m256i fg_hi = _mm256_unpackhi_epi8(fg, _mm256_setzero_si256()); \ __m256i fg_lo = _mm256_unpacklo_epi8(fg, _mm256_setzero_si256()); \ \ @@ -566,8 +566,58 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) } */ +// Normal premultiplied alpha blend using the alpha from fg +#define VEC_ALPHA_BLEND(fg,bg) { \ + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \ + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); \ + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); \ + __m128i m255 = _mm_set1_epi16(255); \ + __m128i inv_alpha_hi = _mm_sub_epi16(m255, _mm_shufflehi_epi16(_mm_shufflelo_epi16(fg_hi, _MM_SHUFFLE(3,3,3,3)), _MM_SHUFFLE(3,3,3,3))); \ + __m128i inv_alpha_lo = _mm_sub_epi16(m255, _mm_shufflehi_epi16(_mm_shufflelo_epi16(fg_lo, _MM_SHUFFLE(3,3,3,3)), _MM_SHUFFLE(3,3,3,3))); \ + inv_alpha_hi = _mm_add_epi16(inv_alpha_hi, _mm_srli_epi16(inv_alpha_hi, 7)); \ + inv_alpha_lo = _mm_add_epi16(inv_alpha_lo, _mm_srli_epi16(inv_alpha_lo, 7)); \ + bg_hi = _mm_mullo_epi16(bg_hi, inv_alpha_hi); \ + bg_hi = _mm_srli_epi16(bg_hi, 8); \ + bg_lo = _mm_mullo_epi16(bg_lo, inv_alpha_lo); \ + bg_lo = _mm_srli_epi16(bg_lo, 8); \ + bg = _mm_packus_epi16(bg_lo, bg_hi); \ + fg = _mm_adds_epu8(fg, bg); \ +} +/* +FORCEINLINE void calc_blend_alpha(uint32_t fg, uint32_t src_alpha, uint32_t dest_alpha, uint32_t &fg_alpha, uint32_t &bg_alpha) +{ + fg_alpha = src_alpha; + bg_alpha = dest_alpha; +} +#define VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha) \ + __m128i fg_alpha_hi = msrc_alpha; \ + __m128i fg_alpha_lo = msrc_alpha; \ + __m128i bg_alpha_hi = mdest_alpha; \ + __m128i bg_alpha_lo = mdest_alpha; +*/ + +// Calculates the final alpha values to be used when combined with the source texture alpha channel +FORCEINLINE void calc_blend_alpha(uint32_t fg, uint32_t src_alpha, uint32_t dest_alpha, uint32_t &fg_alpha, uint32_t &bg_alpha) +{ + fg_alpha = (fg >> 24) & 0xff; + fg_alpha += fg_alpha >> 7; + bg_alpha = (dest_alpha * (256 - fg_alpha)) >> 8; + fg_alpha = (src_alpha * fg_alpha) >> 8; +} + +// Calculates the final alpha values to be used when combined with the source texture alpha channel +#define VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha) \ + __m128i fg_alpha_hi = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpackhi_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \ + __m128i fg_alpha_lo = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpacklo_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \ + fg_alpha_hi = _mm_add_epi16(fg_alpha_hi, _mm_srli_epi16(fg_alpha_hi, 7)); \ + fg_alpha_lo = _mm_add_epi16(fg_alpha_lo, _mm_srli_epi16(fg_alpha_lo, 7)); \ + __m128i bg_alpha_hi = _mm_srli_epi16(_mm_mullo_epi16(_mm_sub_epi16(_mm_set1_epi16(256), fg_alpha_hi), mdest_alpha), 8); \ + __m128i bg_alpha_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_sub_epi16(_mm_set1_epi16(256), fg_alpha_lo), mdest_alpha), 8); \ + fg_alpha_hi = _mm_srli_epi16(_mm_mullo_epi16(fg_alpha_hi, msrc_alpha), 8); \ + fg_alpha_lo = _mm_srli_epi16(_mm_mullo_epi16(fg_alpha_lo, msrc_alpha), 8); // Calculate constants for a simple shade #define SSE_SHADE_SIMPLE_INIT(light) \ diff --git a/src/r_draw_rgba_sse.h b/src/r_draw_rgba_sse.h index 14ebbbb41..0597580e1 100644 --- a/src/r_draw_rgba_sse.h +++ b/src/r_draw_rgba_sse.h @@ -444,17 +444,16 @@ public: uint32_t pix2 = bufplce[2][place2 >> bits]; uint32_t pix3 = bufplce[3][place3 >> bits]; - // movemask = !(pix == 0) - __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); - local_vplce[0] = place0 + local_vince[0]; local_vplce[1] = place1 + local_vince[1]; local_vplce[2] = place2 + local_vince[2]; local_vplce[3] = place3 + local_vince[3]; __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + __m128i bg = _mm_loadu_si128((const __m128i*)dest); VEC_SHADE_SIMPLE(fg); - _mm_maskmoveu_si128(fg, movemask, (char*)dest); + VEC_ALPHA_BLEND(fg, bg); + _mm_storeu_si128((__m128i*)dest, fg); dest += pitch; } while (--count); } @@ -473,17 +472,585 @@ public: uint32_t pix2 = bufplce[2][place2 >> bits]; uint32_t pix3 = bufplce[3][place3 >> bits]; - // movemask = !(pix == 0) - __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); - local_vplce[0] = place0 + local_vince[0]; local_vplce[1] = place1 + local_vince[1]; local_vplce[2] = place2 + local_vince[2]; local_vplce[3] = place3 + local_vince[3]; __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + __m128i bg = _mm_loadu_si128((const __m128i*)dest); VEC_SHADE(fg, shade_constants); - _mm_maskmoveu_si128(fg, movemask, (char*)dest); + VEC_ALPHA_BLEND(fg, bg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += pitch; + } while (--count); + } + } +}; + +class VecCommand(Tmvline4AddRGBA) : public DrawerCommand +{ + BYTE * RESTRICT _dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + int tmvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const uint32 * RESTRICT bufplce[4]; + +public: + VecCommand(Tmvline4AddRGBA)() + { + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + tmvlinebits = ::tmvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; + } + } + + void Execute(DrawerThread *thread) override + { + int count = thread->count_for_thread(_dest_y, _count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; + int bits = tmvlinebits; + + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = _shade_constants; + + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); + + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); + + __m128i msrc_alpha = _mm_set1_epi16(src_alpha); + __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + + do + { + uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; + uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; + uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; + uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; + + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + + VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_SHADE_SIMPLE(fg); + + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)dest, out); + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); + + __m128i msrc_alpha = _mm_set1_epi16(src_alpha); + __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + + do + { + uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; + uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; + uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; + uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; + + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_SHADE(fg, shade_constants); + + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)dest, out); + dest += pitch; + } while (--count); + } + } +}; + +class VecCommand(Tmvline4AddClampRGBA) : public DrawerCommand +{ + BYTE * RESTRICT _dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + int tmvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const uint32 *RESTRICT bufplce[4]; + +public: + VecCommand(Tmvline4AddClampRGBA)() + { + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + tmvlinebits = ::tmvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; + } + } + + void Execute(DrawerThread *thread) override + { + int count = thread->count_for_thread(_dest_y, _count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; + int bits = tmvlinebits; + + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = _shade_constants; + + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); + + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); + + __m128i msrc_alpha = _mm_set1_epi16(src_alpha); + __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + + do + { + uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; + uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; + uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; + uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; + + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_SHADE_SIMPLE(fg); + + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)dest, out); + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); + + __m128i msrc_alpha = _mm_set1_epi16(src_alpha); + __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + + do + { + uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; + uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; + uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; + uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; + + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_SHADE(fg, shade_constants); + + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)dest, out); + dest += pitch; + } while (--count); + } + } +}; + +class VecCommand(Tmvline4SubClampRGBA) : public DrawerCommand +{ + BYTE * RESTRICT _dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + int tmvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const uint32 *RESTRICT bufplce[4]; + +public: + VecCommand(Tmvline4SubClampRGBA)() + { + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + tmvlinebits = ::tmvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; + } + } + + void Execute(DrawerThread *thread) override + { + int count = thread->count_for_thread(_dest_y, _count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; + int bits = tmvlinebits; + + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = _shade_constants; + + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); + + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); + + __m128i msrc_alpha = _mm_set1_epi16(src_alpha); + __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + + do + { + uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; + uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; + uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; + uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; + + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_SHADE_SIMPLE(fg); + + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_hi, bg_alpha_hi), _mm_mullo_epi16(fg_hi, fg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_lo, bg_alpha_lo), _mm_mullo_epi16(fg_lo, fg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)dest, out); + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); + + __m128i msrc_alpha = _mm_set1_epi16(src_alpha); + __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + + do + { + uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; + uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; + uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; + uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; + + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_SHADE(fg, shade_constants); + + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_hi, bg_alpha_hi), _mm_mullo_epi16(fg_hi, fg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_lo, bg_alpha_lo), _mm_mullo_epi16(fg_lo, fg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)dest, out); + dest += pitch; + } while (--count); + } + } +}; + +class VecCommand(Tmvline4RevSubClampRGBA) : public DrawerCommand +{ + BYTE * RESTRICT _dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + int tmvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const uint32 *RESTRICT bufplce[4]; + +public: + VecCommand(Tmvline4RevSubClampRGBA)() + { + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + tmvlinebits = ::tmvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; + } + } + + void Execute(DrawerThread *thread) override + { + int count = thread->count_for_thread(_dest_y, _count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; + int bits = tmvlinebits; + + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = _shade_constants; + + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); + + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); + + __m128i msrc_alpha = _mm_set1_epi16(src_alpha); + __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + + do + { + uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; + uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; + uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; + uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; + + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_SHADE_SIMPLE(fg); + + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)dest, out); + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); + + __m128i msrc_alpha = _mm_set1_epi16(src_alpha); + __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + + do + { + uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; + uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; + uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; + uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; + + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_SHADE(fg, shade_constants); + + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)dest, out); dest += pitch; } while (--count); } diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 4da963430..1e1236f0e 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -1436,7 +1436,14 @@ void rt_subclamp1col_rgba (int hx, int sx, int yl, int yh) // Subtracts all four spans to the screen starting at sx with clamping. void rt_subclamp4cols_rgba (int sx, int yl, int yh) { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(sx, yl, yh); +#endif } // Translates and subtracts one span at hx to the screen at sx with clamping. @@ -1462,7 +1469,14 @@ void rt_revsubclamp1col_rgba (int hx, int sx, int yl, int yh) // Subtracts all four spans from the screen starting at sx with clamping. void rt_revsubclamp4cols_rgba (int sx, int yl, int yh) { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(sx, yl, yh); +#endif } // Translates and subtracts one span at hx from the screen at sx with clamping. diff --git a/src/r_drawt_rgba_sse.h b/src/r_drawt_rgba_sse.h index 5b8ae8081..684be2b6a 100644 --- a/src/r_drawt_rgba_sse.h +++ b/src/r_drawt_rgba_sse.h @@ -493,3 +493,255 @@ public: } } }; + +class VecCommand(RtSubClamp4colsRGBA) : public DrawerCommand +{ + int sx; + int yl; + int yh; + BYTE * RESTRICT _destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; + +public: + VecCommand(RtSubClamp4colsRGBA)(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + int sincr; + + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) + return; + + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = _pitch * thread->num_cores; + sincr = 4 * thread->num_cores; + + uint32_t light = calc_light_multiplier(_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + ShadeConstants shade_constants = _shade_constants; + + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT(light); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (bg_red * bg_alpha - fg_red * fg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_hi, mbg_alpha), _mm_mullo_epi16(fg_hi, mfg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_lo, mbg_alpha), _mm_mullo_epi16(fg_lo, mfg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += sincr; + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT(light, shade_constants); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE(fg, shade_constants); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (bg_red * bg_alpha - fg_red * fg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_hi, mbg_alpha), _mm_mullo_epi16(fg_hi, mfg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_lo, mbg_alpha), _mm_mullo_epi16(fg_lo, mfg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += sincr; + dest += pitch; + } while (--count); + } + } +}; + +class VecCommand(RtRevSubClamp4colsRGBA) : public DrawerCommand +{ + int sx; + int yl; + int yh; + BYTE * RESTRICT _destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; + +public: + VecCommand(RtRevSubClamp4colsRGBA)(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + int sincr; + + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) + return; + + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = _pitch * thread->num_cores; + sincr = 4 * thread->num_cores; + + uint32_t light = calc_light_multiplier(_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + ShadeConstants shade_constants = _shade_constants; + + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT(light); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha - bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += sincr; + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT(light, shade_constants); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE(fg, shade_constants); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha - bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += sincr; + dest += pitch; + } while (--count); + } + } +}; From e72a032a114c7710112534157abad8fa300c2f7d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 19 Jun 2016 23:12:10 +0200 Subject: [PATCH 61/94] Fixed alpha channel issue with textures --- src/textures/pngtexture.cpp | 10 ++++++++-- src/textures/texture.cpp | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/textures/pngtexture.cpp b/src/textures/pngtexture.cpp index 206797a34..408cf1e2f 100644 --- a/src/textures/pngtexture.cpp +++ b/src/textures/pngtexture.cpp @@ -724,8 +724,9 @@ void FPNGTexture::MakeTextureBgra () { for (y = Height; y > 0; --y) { + // output as premultiplied alpha uint32_t alpha = in[1]; - uint32_t gray = in[0]; + uint32_t gray = (in[0] * alpha + 127) / 255; *out++ = (alpha << 24) | (gray << 16) | (gray << 8) | gray; in += pitch; } @@ -740,7 +741,12 @@ void FPNGTexture::MakeTextureBgra () { for (y = Height; y > 0; --y) { - *out++ = (((uint32_t)in[3]) << 24) | (((uint32_t)in[0]) << 16) | (((uint32_t)in[1]) << 8) | ((uint32_t)in[2]); + // output as premultiplied alpha + uint32_t alpha = in[3]; + uint32_t red = (in[0] * alpha + 127) / 255; + uint32_t green = (in[1] * alpha + 127) / 255; + uint32_t blue = (in[2] * alpha + 127) / 255; + *out++ = (alpha << 24) | (red << 16) | (green << 8) | blue; in += pitch; } in -= backstep; diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index 0030719cb..da5dd8ad7 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -203,7 +203,7 @@ const uint32_t *FTexture::GetPixelsBgra() PixelsBgra.resize(Width * Height); for (int i = 0; i < Width * Height; i++) { - PixelsBgra[i] = GPalette.BaseColors[indices[i]].d; + PixelsBgra[i] = 0xff000000 | GPalette.BaseColors[indices[i]].d; } } return PixelsBgra.data(); From d3bc68a160be4b6549f68454b308ad66c62e1d50 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 19 Jun 2016 23:37:22 +0200 Subject: [PATCH 62/94] Disabled the AVX intrinsics --- src/r_draw_rgba.cpp | 40 +++++++++------------------------------- src/r_drawt_rgba.cpp | 36 ++++++++---------------------------- 2 files changed, 17 insertions(+), 59 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 96232ab0c..b437fbe00 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -58,7 +58,6 @@ extern float rw_lightstep; extern int wallshade; CVAR(Bool, r_multithreaded, true, 0) -CVAR(Bool, r_linearlight, false, 0) #ifndef NO_SSE @@ -71,7 +70,7 @@ CVAR(Bool, r_linearlight, false, 0) #define VEC_SHADE_INIT4 SSE_SHADE_INIT4 #define VEC_SHADE SSE_SHADE #include "r_draw_rgba_sse.h" - +/* // Generate AVX drawers: #undef VecCommand #undef VEC_SHADE_SIMPLE_INIT @@ -88,7 +87,7 @@ CVAR(Bool, r_linearlight, false, 0) #define VEC_SHADE_INIT4 AVX_LINEAR_SHADE_INIT4 #define VEC_SHADE AVX_LINEAR_SHADE #include "r_draw_rgba_sse.h" - +*/ #endif ///////////////////////////////////////////////////////////////////////////// @@ -3652,10 +3651,7 @@ void R_DrawSpan_rgba() #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); #endif } @@ -3709,10 +3705,7 @@ void vlinec4_rgba() #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); #endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; @@ -3729,10 +3722,7 @@ void mvlinec4_rgba() #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); #endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; @@ -3749,10 +3739,7 @@ void tmvline4_add_rgba() #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); #endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; @@ -3769,10 +3756,7 @@ void tmvline4_addclamp_rgba() #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); #endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; @@ -3789,10 +3773,7 @@ void tmvline4_subclamp_rgba() #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); #endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; @@ -3809,10 +3790,7 @@ void tmvline4_revsubclamp_rgba() #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); #endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 1e1236f0e..e239674e8 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -51,8 +51,6 @@ extern unsigned int dc_tspans[4][MAXHEIGHT]; extern unsigned int *dc_ctspan[4]; extern unsigned int *horizspan[4]; -EXTERN_CVAR(Bool, r_linearlight) - #ifndef NO_SSE // Generate SSE drawers: @@ -64,7 +62,7 @@ EXTERN_CVAR(Bool, r_linearlight) #define VEC_SHADE_INIT4 SSE_SHADE_INIT4 #define VEC_SHADE SSE_SHADE #include "r_drawt_rgba_sse.h" - +/* // Generate AVX drawers: #undef VecCommand #undef VEC_SHADE_SIMPLE_INIT @@ -81,7 +79,7 @@ EXTERN_CVAR(Bool, r_linearlight) #define VEC_SHADE_INIT4 AVX_LINEAR_SHADE_INIT4 #define VEC_SHADE AVX_LINEAR_SHADE #include "r_drawt_rgba_sse.h" - +*/ #endif ///////////////////////////////////////////////////////////////////////////// @@ -1311,10 +1309,7 @@ void rt_map4cols_rgba (int sx, int yl, int yh) #ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(sx, yl, yh); + DrawerCommandQueue::QueueCommand(sx, yl, yh); #endif } @@ -1354,10 +1349,7 @@ void rt_add4cols_rgba (int sx, int yl, int yh) #ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(sx, yl, yh); + DrawerCommandQueue::QueueCommand(sx, yl, yh); #endif } @@ -1387,10 +1379,7 @@ void rt_shaded4cols_rgba (int sx, int yl, int yh) #ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(sx, yl, yh); + DrawerCommandQueue::QueueCommand(sx, yl, yh); #endif } @@ -1406,10 +1395,7 @@ void rt_addclamp4cols_rgba (int sx, int yl, int yh) #ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(sx, yl, yh); + DrawerCommandQueue::QueueCommand(sx, yl, yh); #endif } @@ -1439,10 +1425,7 @@ void rt_subclamp4cols_rgba (int sx, int yl, int yh) #ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(sx, yl, yh); + DrawerCommandQueue::QueueCommand(sx, yl, yh); #endif } @@ -1472,10 +1455,7 @@ void rt_revsubclamp4cols_rgba (int sx, int yl, int yh) #ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(sx, yl, yh); + DrawerCommandQueue::QueueCommand(sx, yl, yh); #endif } From 6daeb5a15881c2198af31cb564c23d6090f026d4 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 20 Jun 2016 02:36:54 +0200 Subject: [PATCH 63/94] Blend mode fixes --- src/r_draw_rgba.cpp | 32 +++++++-------- src/r_draw_rgba.h | 87 ++++++++++++++++++---------------------- src/r_draw_rgba_sse.h | 48 ++++++++-------------- src/textures/texture.cpp | 5 ++- 4 files changed, 76 insertions(+), 96 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index b437fbe00..f317a34d6 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -2538,8 +2538,8 @@ public: { uint32_t pix = source[frac >> bits]; - uint32_t fg_alpha, bg_alpha; - calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); + uint32_t fg_alpha = src_alpha; + uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); uint32_t fg = shade_bgra(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; @@ -2631,8 +2631,8 @@ public: { uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - uint32_t fg_alpha, bg_alpha; - calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); + uint32_t fg_alpha = src_alpha; + uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); uint32_t fg = shade_bgra(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; @@ -2709,8 +2709,8 @@ public: { uint32_t pix = source[frac >> bits]; - uint32_t fg_alpha, bg_alpha; - calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); + uint32_t fg_alpha = src_alpha; + uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); uint32_t fg = shade_bgra(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; @@ -2802,8 +2802,8 @@ public: { uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - uint32_t fg_alpha, bg_alpha; - calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); + uint32_t fg_alpha = src_alpha; + uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); uint32_t fg = shade_bgra(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; @@ -2880,8 +2880,8 @@ public: { uint32_t pix = source[frac >> bits]; - uint32_t fg_alpha, bg_alpha; - calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); + uint32_t fg_alpha = src_alpha; + uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); uint32_t fg = shade_bgra(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; @@ -2973,8 +2973,8 @@ public: { uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - uint32_t fg_alpha, bg_alpha; - calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); + uint32_t fg_alpha = src_alpha; + uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); uint32_t fg = shade_bgra(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; @@ -3051,8 +3051,8 @@ public: { uint32_t pix = source[frac >> bits]; - uint32_t fg_alpha, bg_alpha; - calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); + uint32_t fg_alpha = src_alpha; + uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); uint32_t fg = shade_bgra(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; @@ -3144,8 +3144,8 @@ public: { uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - uint32_t fg_alpha, bg_alpha; - calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); + uint32_t fg_alpha = src_alpha; + uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); uint32_t fg = shade_bgra(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 66be1f38b..2527e84a6 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -339,6 +339,7 @@ FORCEINLINE uint32_t shade_bgra_simple(uint32_t color, uint32_t light) FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants) { const PalEntry &color = GPalette.BaseColors[index]; + uint32_t alpha = color.d & 0xff000000; uint32_t red = color.r; uint32_t green = color.g; uint32_t blue = color.b; @@ -367,11 +368,12 @@ FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const Shade green = (green * constants.light_green) / 256; blue = (blue * constants.light_blue) / 256; } - return 0xff000000 | (red << 16) | (green << 8) | blue; + return alpha | (red << 16) | (green << 8) | blue; } FORCEINLINE uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConstants &constants) { + uint32_t alpha = color & 0xff000000; uint32_t red = (color >> 16) & 0xff; uint32_t green = (color >> 8) & 0xff; uint32_t blue = color & 0xff; @@ -400,12 +402,12 @@ FORCEINLINE uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConst green = (green * constants.light_green) / 256; blue = (blue * constants.light_blue) / 256; } - return 0xff000000 | (red << 16) | (green << 8) | blue; + return alpha | (red << 16) | (green << 8) | blue; } FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) { - uint32_t fg_alpha = (fg >> 24) & 0xff; + uint32_t fg_alpha = fg >> 24; uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -468,11 +470,11 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) __m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \ __m256 m255 = _mm256_set1_ps(255.0f); \ __m256 color = _mm256_set_ps( \ - shade_constants.light_alpha * (1.0f/256.0f), shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f), \ - shade_constants.light_alpha * (1.0f/256.0f), shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f)); \ + 1.0f, shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f), \ + 1.0f, shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f)); \ __m256 fade = _mm256_set_ps( \ - shade_constants.fade_alpha * (1.0f/256.0f), shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f), \ - shade_constants.fade_alpha * (1.0f/256.0f), shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f)); \ + 0.0f, shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f), \ + 0.0f, shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f)); \ __m256 fade_amount_hi = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_hi)); \ __m256 fade_amount_lo = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_lo)); \ __m256 inv_desaturate = _mm256_set1_ps((256 - shade_constants.desaturate) * (1.0f/256.0f)); \ @@ -488,11 +490,11 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) __m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \ __m256 m255 = _mm256_set1_ps(255.0f); \ __m256 color = _mm256_set_ps( \ - shade_constants.light_alpha * (1.0f/256.0f), shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f), \ - shade_constants.light_alpha * (1.0f/256.0f), shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f)); \ + 1.0f, shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f), \ + 1.0f, shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f)); \ __m256 fade = _mm256_set_ps( \ - shade_constants.fade_alpha * (1.0f/256.0f), shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f), \ - shade_constants.fade_alpha * (1.0f/256.0f), shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f)); \ + 0.0f, shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f), \ + 0.0f, shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f)); \ __m256 fade_amount_hi = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_hi)); \ __m256 fade_amount_lo = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_lo)); \ __m256 inv_desaturate = _mm256_set1_ps((256 - shade_constants.desaturate) * (1.0f/256.0f)); \ @@ -585,39 +587,30 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) fg = _mm_adds_epu8(fg, bg); \ } -/* -FORCEINLINE void calc_blend_alpha(uint32_t fg, uint32_t src_alpha, uint32_t dest_alpha, uint32_t &fg_alpha, uint32_t &bg_alpha) +// Calculates the final alpha values to be used when combined with the source texture alpha channel +FORCEINLINE uint32_t calc_blend_bgalpha(uint32_t fg, uint32_t dest_alpha) { - fg_alpha = src_alpha; - bg_alpha = dest_alpha; + uint32_t alpha = fg >> 24; + alpha += alpha >> 7; + return 256 - alpha; // (dest_alpha * (256 - alpha)) >> 8; } -#define VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha) \ - __m128i fg_alpha_hi = msrc_alpha; \ - __m128i fg_alpha_lo = msrc_alpha; \ - __m128i bg_alpha_hi = mdest_alpha; \ - __m128i bg_alpha_lo = mdest_alpha; -*/ +#define VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha) \ + __m128i msrc_alpha = _mm_set1_epi16(src_alpha); \ + __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); // Calculates the final alpha values to be used when combined with the source texture alpha channel -FORCEINLINE void calc_blend_alpha(uint32_t fg, uint32_t src_alpha, uint32_t dest_alpha, uint32_t &fg_alpha, uint32_t &bg_alpha) -{ - fg_alpha = (fg >> 24) & 0xff; - fg_alpha += fg_alpha >> 7; - bg_alpha = (dest_alpha * (256 - fg_alpha)) >> 8; - fg_alpha = (src_alpha * fg_alpha) >> 8; -} - -// Calculates the final alpha values to be used when combined with the source texture alpha channel -#define VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha) \ - __m128i fg_alpha_hi = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpackhi_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \ - __m128i fg_alpha_lo = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpacklo_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \ - fg_alpha_hi = _mm_add_epi16(fg_alpha_hi, _mm_srli_epi16(fg_alpha_hi, 7)); \ - fg_alpha_lo = _mm_add_epi16(fg_alpha_lo, _mm_srli_epi16(fg_alpha_lo, 7)); \ - __m128i bg_alpha_hi = _mm_srli_epi16(_mm_mullo_epi16(_mm_sub_epi16(_mm_set1_epi16(256), fg_alpha_hi), mdest_alpha), 8); \ - __m128i bg_alpha_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_sub_epi16(_mm_set1_epi16(256), fg_alpha_lo), mdest_alpha), 8); \ - fg_alpha_hi = _mm_srli_epi16(_mm_mullo_epi16(fg_alpha_hi, msrc_alpha), 8); \ - fg_alpha_lo = _mm_srli_epi16(_mm_mullo_epi16(fg_alpha_lo, msrc_alpha), 8); +#define VEC_CALC_BLEND_ALPHA(fg) \ + __m128i fg_alpha_hi, fg_alpha_lo, bg_alpha_hi, bg_alpha_lo; { \ + __m128i alpha_hi = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpackhi_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \ + __m128i alpha_lo = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpacklo_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \ + alpha_hi = _mm_add_epi16(alpha_hi, _mm_srli_epi16(alpha_hi, 7)); \ + alpha_lo = _mm_add_epi16(alpha_lo, _mm_srli_epi16(alpha_lo, 7)); \ + bg_alpha_hi = _mm_sub_epi16(_mm_set1_epi16(256), alpha_hi); /* _mm_srli_epi16(_mm_mullo_epi16(_mm_sub_epi16(_mm_set1_epi16(256), alpha_hi), mdest_alpha), 8);*/ \ + bg_alpha_lo = _mm_sub_epi16(_mm_set1_epi16(256), alpha_lo); /* _mm_srli_epi16(_mm_mullo_epi16(_mm_sub_epi16(_mm_set1_epi16(256), alpha_lo), mdest_alpha), 8);*/ \ + fg_alpha_hi = msrc_alpha; \ + fg_alpha_lo = msrc_alpha; \ + } // Calculate constants for a simple shade #define SSE_SHADE_SIMPLE_INIT(light) \ @@ -645,11 +638,11 @@ FORCEINLINE void calc_blend_alpha(uint32_t fg, uint32_t src_alpha, uint32_t dest __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ __m128i mlight_lo = mlight_hi; \ __m128i color = _mm_set_epi16( \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ + 256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + 256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ __m128i fade = _mm_set_epi16( \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ + 0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + 0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ __m128i fade_amount_lo = fade_amount_hi; \ __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ @@ -659,11 +652,11 @@ FORCEINLINE void calc_blend_alpha(uint32_t fg, uint32_t src_alpha, uint32_t dest __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); \ __m128i color = _mm_set_epi16( \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ + 256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + 256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ __m128i fade = _mm_set_epi16( \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ + 0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + 0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ __m128i fade_amount_lo = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_lo)); \ __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ diff --git a/src/r_draw_rgba_sse.h b/src/r_draw_rgba_sse.h index 0597580e1..220638c75 100644 --- a/src/r_draw_rgba_sse.h +++ b/src/r_draw_rgba_sse.h @@ -554,9 +554,7 @@ public: if (shade_constants.simple_shade) { VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); - - __m128i msrc_alpha = _mm_set1_epi16(src_alpha); - __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); do { @@ -572,7 +570,7 @@ public: __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_CALC_BLEND_ALPHA(fg); VEC_SHADE_SIMPLE(fg); __m128i bg = _mm_loadu_si128((const __m128i*)dest); @@ -593,9 +591,7 @@ public: else { VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); - - __m128i msrc_alpha = _mm_set1_epi16(src_alpha); - __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); do { @@ -610,7 +606,7 @@ public: local_vplce[3] = local_vplce[3] + local_vince[3]; __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_CALC_BLEND_ALPHA(fg); VEC_SHADE(fg, shade_constants); __m128i bg = _mm_loadu_si128((const __m128i*)dest); @@ -697,9 +693,7 @@ public: if (shade_constants.simple_shade) { VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); - - __m128i msrc_alpha = _mm_set1_epi16(src_alpha); - __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); do { @@ -714,7 +708,7 @@ public: local_vplce[3] = local_vplce[3] + local_vince[3]; __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_CALC_BLEND_ALPHA(fg); VEC_SHADE_SIMPLE(fg); __m128i bg = _mm_loadu_si128((const __m128i*)dest); @@ -735,9 +729,7 @@ public: else { VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); - - __m128i msrc_alpha = _mm_set1_epi16(src_alpha); - __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); do { @@ -752,7 +744,7 @@ public: local_vplce[3] = local_vplce[3] + local_vince[3]; __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_CALC_BLEND_ALPHA(fg); VEC_SHADE(fg, shade_constants); __m128i bg = _mm_loadu_si128((const __m128i*)dest); @@ -839,9 +831,7 @@ public: if (shade_constants.simple_shade) { VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); - - __m128i msrc_alpha = _mm_set1_epi16(src_alpha); - __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); do { @@ -856,7 +846,7 @@ public: local_vplce[3] = local_vplce[3] + local_vince[3]; __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_CALC_BLEND_ALPHA(fg); VEC_SHADE_SIMPLE(fg); __m128i bg = _mm_loadu_si128((const __m128i*)dest); @@ -877,9 +867,7 @@ public: else { VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); - - __m128i msrc_alpha = _mm_set1_epi16(src_alpha); - __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); do { @@ -894,7 +882,7 @@ public: local_vplce[3] = local_vplce[3] + local_vince[3]; __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_CALC_BLEND_ALPHA(fg); VEC_SHADE(fg, shade_constants); __m128i bg = _mm_loadu_si128((const __m128i*)dest); @@ -981,9 +969,7 @@ public: if (shade_constants.simple_shade) { VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); - - __m128i msrc_alpha = _mm_set1_epi16(src_alpha); - __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); do { @@ -998,7 +984,7 @@ public: local_vplce[3] = local_vplce[3] + local_vince[3]; __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_CALC_BLEND_ALPHA(fg); VEC_SHADE_SIMPLE(fg); __m128i bg = _mm_loadu_si128((const __m128i*)dest); @@ -1019,9 +1005,7 @@ public: else { VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); - - __m128i msrc_alpha = _mm_set1_epi16(src_alpha); - __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); do { @@ -1036,7 +1020,7 @@ public: local_vplce[3] = local_vplce[3] + local_vince[3]; __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_CALC_BLEND_ALPHA(fg); VEC_SHADE(fg, shade_constants); __m128i bg = _mm_loadu_si128((const __m128i*)dest); diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index da5dd8ad7..16a9e63a6 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -203,7 +203,10 @@ const uint32_t *FTexture::GetPixelsBgra() PixelsBgra.resize(Width * Height); for (int i = 0; i < Width * Height; i++) { - PixelsBgra[i] = 0xff000000 | GPalette.BaseColors[indices[i]].d; + if (indices[i] != 0) + PixelsBgra[i] = 0xff000000 | GPalette.BaseColors[indices[i]].d; + else + PixelsBgra[i] = 0; } } return PixelsBgra.data(); From c70aa1fe99657e053e3b0aa1a9d00b307ab54bca Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 20 Jun 2016 08:24:02 +0200 Subject: [PATCH 64/94] Added bilinear filtering --- src/r_draw.cpp | 4 ++ src/r_draw.h | 36 ++++++++++ src/r_draw_rgba.cpp | 158 ++++++++++++++++++++++++++++++++------------ src/r_draw_rgba.h | 52 +++++++++++++++ src/r_segs.cpp | 54 +++++++++------ 5 files changed, 242 insertions(+), 62 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 7829e2b77..83c4ac8d4 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -162,6 +162,8 @@ fixed_t dc_destalpha; // Alpha value used by dc_destblend // first pixel in a column (possibly virtual) const BYTE* dc_source; +const BYTE* dc_source2; +uint32_t dc_texturefracx; BYTE* dc_dest; int dc_count; @@ -171,6 +173,8 @@ DWORD vince[4]; BYTE* palookupoffse[4]; fixed_t palookuplight[4]; const BYTE* bufplce[4]; +const BYTE* bufplce2[4]; +uint32_t buftexturefracx[4]; // just for profiling int dccount; diff --git a/src/r_draw.h b/src/r_draw.h index 99ee4d10d..d5ecbd289 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -71,6 +71,8 @@ extern "C" fixed_t dc_destalpha; // first pixel in a column extern "C" const BYTE* dc_source; +extern "C" const BYTE* dc_source2; +extern "C" uint32_t dc_texturefracx; extern "C" BYTE *dc_dest, *dc_destorg; extern "C" int dc_count; @@ -80,6 +82,8 @@ extern "C" DWORD vince[4]; extern "C" BYTE* palookupoffse[4]; extern "C" fixed_t palookuplight[4]; extern "C" const BYTE* bufplce[4]; +extern "C" const BYTE* bufplce2[4]; +extern "C" uint32_t buftexturefracx[4]; // [RH] Temporary buffer for column drawing extern "C" BYTE *dc_temp; @@ -374,4 +378,36 @@ void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade); void R_SetTranslationMap(lighttable_t *translation); +extern bool r_swtruecolor; +EXTERN_CVAR(Bool, r_bilinear); + +// Texture sampler state needed for bilinear filtering +struct SamplerSetup +{ + SamplerSetup() { } + SamplerSetup(fixed_t xoffset, bool magnifying, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)); + + const BYTE *source; + const BYTE *source2; + uint32_t texturefracx; +}; + +inline SamplerSetup::SamplerSetup(fixed_t xoffset, bool magnifying, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)) +{ + // Only do bilinear filtering if enabled and not a magnifying filter + if (!r_swtruecolor || !r_bilinear || magnifying) + { + source = getcol(texture, xoffset >> FRACBITS); + source2 = nullptr; + texturefracx = 0; + } + else + { + int tx = (xoffset - FRACUNIT / 2) >> FRACBITS; + source = getcol(texture, tx); + source2 = getcol(texture, tx + 1); + texturefracx = ((xoffset + FRACUNIT / 2) >> (FRACBITS - 4)) & 15; + } +} + #endif diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index f317a34d6..d85d9994b 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -58,6 +58,7 @@ extern float rw_lightstep; extern int wallshade; CVAR(Bool, r_multithreaded, true, 0) +CVAR(Bool, r_bilinear, false, 0) #ifndef NO_SSE @@ -1547,41 +1548,72 @@ public: uint32_t light = calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; - if (_xbits == 6 && _ybits == 6) + fixed_t xmagnitude = abs((fixed_t)xstep) >> (32 - _xbits - FRACBITS); + fixed_t ymagnitude = abs((fixed_t)ystep) >> (32 - _ybits - FRACBITS); + fixed_t magnitude = xmagnitude + ymagnitude; + + bool magnifying = !r_bilinear || magnitude >> (FRACBITS - 1) == 0; + if (magnifying) { - // 64x64 is the most common case by far, so special case it. - - do + if (_xbits == 6 && _ybits == 6) { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + // 64x64 is the most common case by far, so special case it. - // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); + do + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); + // Lookup pixel from flat texture tile + *dest++ = shade_bgra(source[spot], light, shade_constants); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + + do + { + // Current texture index in u,v. + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + // Lookup pixel from flat texture tile + *dest++ = shade_bgra(source[spot], light, shade_constants); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; - - do + if (_xbits == 6 && _ybits == 6) { - // Current texture index in u,v. - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + // 64x64 is the most common case by far, so special case it. - // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); + do + { + *dest++ = shade_bgra(sample_bilinear(source, xfrac, yfrac, 26, 26), light, shade_constants); + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + do + { + *dest++ = shade_bgra(sample_bilinear(source, xfrac, yfrac, 32 - _xbits, 32 - _ybits), light, shade_constants); + xfrac += xstep; + yfrac += ystep; + } while (--count); + } } } }; @@ -2253,6 +2285,8 @@ class Vlinec1RGBACommand : public DrawerCommand DWORD _texturefrac; int _count; const BYTE * RESTRICT _source; + const BYTE * RESTRICT _source2; + uint32_t _texturefracx; BYTE * RESTRICT _dest; int vlinebits; int _pitch; @@ -2266,6 +2300,8 @@ public: _texturefrac = dc_texturefrac; _count = dc_count; _source = dc_source; + _source2 = dc_source2; + _texturefracx = dc_texturefracx; _dest = dc_dest; vlinebits = ::vlinebits; _pitch = dc_pitch; @@ -2282,6 +2318,8 @@ public: DWORD fracstep = _iscale * thread->num_cores; DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); const uint32 *source = (const uint32 *)_source; + const uint32 *source2 = (const uint32 *)_source2; + uint32_t texturefracx = _texturefracx; uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = vlinebits; int pitch = _pitch * thread->num_cores; @@ -2289,12 +2327,24 @@ public: uint32_t light = calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; - do + if (_source2 == nullptr) { - *dest = shade_bgra(source[frac >> bits], light, shade_constants); - frac += fracstep; - dest += pitch; - } while (--count); + do + { + *dest = shade_bgra(source[frac >> bits], light, shade_constants); + frac += fracstep; + dest += pitch; + } while (--count); + } + else + { + do + { + *dest = shade_bgra(sample_bilinear(source, source2, texturefracx, frac, bits), light, shade_constants); + frac += fracstep; + dest += pitch; + } while (--count); + } } }; @@ -2308,7 +2358,9 @@ class Vlinec4RGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const uint32 * RESTRICT bufplce[4]; + const uint32_t * RESTRICT bufplce[4]; + const uint32_t * RESTRICT bufplce2[4]; + uint32_t buftexturefracx[4]; public: Vlinec4RGBACommand() @@ -2323,7 +2375,9 @@ public: palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; + bufplce[i] = (const uint32_t *)::bufplce[i]; + bufplce2[i] = (const uint32_t *)::bufplce2[i]; + buftexturefracx[i] = ::buftexturefracx[i]; } } @@ -2354,14 +2408,28 @@ public: local_vince[i] *= thread->num_cores; } - do + if (bufplce2[0] == nullptr) { - dest[0] = shade_bgra(bufplce[0][(place = local_vplce[0]) >> bits], light0, shade_constants); local_vplce[0] = place + local_vince[0]; - dest[1] = shade_bgra(bufplce[1][(place = local_vplce[1]) >> bits], light1, shade_constants); local_vplce[1] = place + local_vince[1]; - dest[2] = shade_bgra(bufplce[2][(place = local_vplce[2]) >> bits], light2, shade_constants); local_vplce[2] = place + local_vince[2]; - dest[3] = shade_bgra(bufplce[3][(place = local_vplce[3]) >> bits], light3, shade_constants); local_vplce[3] = place + local_vince[3]; - dest += pitch; - } while (--count); + do + { + dest[0] = shade_bgra(bufplce[0][(place = local_vplce[0]) >> bits], light0, shade_constants); local_vplce[0] = place + local_vince[0]; + dest[1] = shade_bgra(bufplce[1][(place = local_vplce[1]) >> bits], light1, shade_constants); local_vplce[1] = place + local_vince[1]; + dest[2] = shade_bgra(bufplce[2][(place = local_vplce[2]) >> bits], light2, shade_constants); local_vplce[2] = place + local_vince[2]; + dest[3] = shade_bgra(bufplce[3][(place = local_vplce[3]) >> bits], light3, shade_constants); local_vplce[3] = place + local_vince[3]; + dest += pitch; + } while (--count); + } + else + { + do + { + dest[0] = shade_bgra(sample_bilinear(bufplce[0], bufplce2[0], buftexturefracx[0], place = local_vplce[0], bits), light0, shade_constants); local_vplce[0] = place + local_vince[0]; + dest[1] = shade_bgra(sample_bilinear(bufplce[1], bufplce2[1], buftexturefracx[1], place = local_vplce[1], bits), light1, shade_constants); local_vplce[1] = place + local_vince[1]; + dest[2] = shade_bgra(sample_bilinear(bufplce[2], bufplce2[2], buftexturefracx[2], place = local_vplce[2], bits), light2, shade_constants); local_vplce[2] = place + local_vince[2]; + dest[3] = shade_bgra(sample_bilinear(bufplce[3], bufplce2[3], buftexturefracx[3], place = local_vplce[3], bits), light3, shade_constants); local_vplce[3] = place + local_vince[3]; + dest += pitch; + } while (--count); + } } }; @@ -3651,7 +3719,10 @@ void R_DrawSpan_rgba() #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else - DrawerCommandQueue::QueueCommand(); + if (!r_bilinear) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); #endif } @@ -3705,7 +3776,10 @@ void vlinec4_rgba() #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else - DrawerCommandQueue::QueueCommand(); + if (!r_bilinear) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); #endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 2527e84a6..a266ce878 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -426,6 +426,58 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) return 0xff000000 | (red << 16) | (green << 8) | blue; } +FORCEINLINE uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, int ybits) +{ + uint32_t half = 1 << (ybits - 1); + uint32_t y = (texturefracy - half) >> ybits; + + uint32_t p00 = col0[y]; + uint32_t p01 = col0[y + 1]; + uint32_t p10 = col1[y]; + uint32_t p11 = col1[y + 1]; + + uint32_t inv_b = texturefracx; + uint32_t inv_a = ((texturefracy + half) >> (ybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t red = (RPART(p00) * a * b + RPART(p01) * inv_a * b + RPART(p10) * a * inv_b + RPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t green = (GPART(p00) * a * b + GPART(p01) * inv_a * b + GPART(p10) * a * inv_b + GPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t blue = (BPART(p00) * a * b + BPART(p01) * inv_a * b + BPART(p10) * a * inv_b + BPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t alpha = (APART(p00) * a * b + APART(p01) * inv_a * b + APART(p10) * a * inv_b + APART(p11) * inv_a * inv_b + 127) >> 8; + + return (alpha << 24) | (red << 16) | (green << 8) | blue; +} + +FORCEINLINE uint32_t sample_bilinear(const uint32_t *texture, dsfixed_t xfrac, dsfixed_t yfrac, int xbits, int ybits) +{ + int xshift = (32 - xbits); + int yshift = (32 - ybits); + int xmask = (1 << xshift) - 1; + int ymask = (1 << yshift) - 1; + uint32_t xhalf = 1 << (xbits - 1); + uint32_t yhalf = 1 << (ybits - 1); + uint32_t x = (xfrac - xhalf) >> xbits; + uint32_t y = (yfrac - yhalf) >> ybits; + + uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; + uint32_t p01 = texture[(y + 1 & ymask) + ((x & xmask) << yshift)]; + uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; + uint32_t p11 = texture[(y + 1 & ymask) + (((x + 1) & xmask) << yshift)]; + + uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; + uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t red = (RPART(p00) * a * b + RPART(p01) * inv_a * b + RPART(p10) * a * inv_b + RPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t green = (GPART(p00) * a * b + GPART(p01) * inv_a * b + GPART(p10) * a * inv_b + GPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t blue = (BPART(p00) * a * b + BPART(p01) * inv_a * b + BPART(p10) * a * inv_b + BPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t alpha = (APART(p00) * a * b + APART(p01) * inv_a * b + APART(p10) * a * inv_b + APART(p11) * inv_a * inv_b + 127) >> 8; + + return (alpha << 24) | (red << 16) | (green << 8) | blue; +} + // Calculate constants for a simple shade with gamma correction #define AVX_LINEAR_SHADE_SIMPLE_INIT(light) \ __m256 mlight_hi = _mm256_set_ps(1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f), 1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f)); \ diff --git a/src/r_segs.cpp b/src/r_segs.cpp index ad242b2f9..d71487bb9 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -58,6 +58,8 @@ CVAR(Bool, r_np2, true, 0) +EXTERN_CVAR(Bool, r_bilinear) + //CVAR (Int, ty, 8, 0) //CVAR (Int, tx, 8, 0) @@ -1066,14 +1068,16 @@ void R_RenderFakeWallRange (drawseg_t *ds, int x1, int x2) } // Draw a column with support for non-power-of-two ranges -uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv_step, uint32_t uv_max, const BYTE *source, DWORD(*draw1column)()) +uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv_step, uint32_t uv_max, const SamplerSetup &sampler, DWORD(*draw1column)()) { int pixelsize = r_swtruecolor ? 4 : 1; if (uv_max == 0) // power of two { int count = y2 - y1; - dc_source = source; + dc_source = sampler.source; + dc_source2 = sampler.source2; + dc_texturefracx = sampler.texturefracx; dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; dc_count = count; dc_iscale = uv_step; @@ -1097,7 +1101,9 @@ uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv next_uv_wrap++; uint32_t count = MIN(left, next_uv_wrap); - dc_source = source; + dc_source = sampler.source; + dc_source2 = sampler.source2; + dc_texturefracx = sampler.texturefracx; dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; dc_count = count; dc_iscale = uv_step; @@ -1115,7 +1121,7 @@ uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv } // Draw four columns with support for non-power-of-two ranges -void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_step, uint32_t uv_max, const BYTE **source, void(*draw4columns)()) +void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_step, uint32_t uv_max, const SamplerSetup *sampler, void(*draw4columns)()) { int pixelsize = r_swtruecolor ? 4 : 1; if (uv_max == 0) // power of two, no wrap handling needed @@ -1123,7 +1129,9 @@ void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_ste int count = y2 - y1; for (int i = 0; i < 4; i++) { - bufplce[i] = source[i]; + bufplce[i] = sampler[i].source; + bufplce2[i] = sampler[i].source2; + buftexturefracx[i] = sampler[i].texturefracx; vplce[i] = uv_pos[i]; vince[i] = uv_step[i]; @@ -1139,7 +1147,11 @@ void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_ste { dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; for (int i = 0; i < 4; i++) - bufplce[i] = source[i]; + { + bufplce[i] = sampler[i].source; + bufplce2[i] = sampler[i].source2; + buftexturefracx[i] = sampler[i].texturefracx; + } uint32_t left = y2 - y1; while (left > 0) @@ -1249,12 +1261,11 @@ void wallscan_any( if (!fixed) R_SetColorMapLight(basecolormap, light, wallshade); - const BYTE *source = getcol(rw_pic, (lwal[x] + xoffset) >> FRACBITS); - uint32_t uv_start, uv_step; calc_uv_start_and_step(y1, swal[x], yrepeat, uv_height, fracbits, uv_start, uv_step); - wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, source, draw1column); + SamplerSetup sampler(lwal[x] + xoffset, uv_step >> (fracbits - 1) == 0, rw_pic, getcol); + wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, sampler, draw1column); } // The aligned columns @@ -1264,10 +1275,6 @@ void wallscan_any( int y1[4] = { uwal[x], uwal[x + 1], uwal[x + 2], uwal[x + 3] }; int y2[4] = { dwal[x], dwal[x + 1], dwal[x + 2], dwal[x + 3] }; - const BYTE *source[4]; - for (int i = 0; i < 4; i++) - source[i] = getcol(rw_pic, (lwal[x + i] + xoffset) >> FRACBITS); - float lights[4]; for (int i = 0; i < 4; i++) { @@ -1276,8 +1283,16 @@ void wallscan_any( } uint32_t uv_pos[4], uv_step[4]; + int magnifying = 0; for (int i = 0; i < 4; i++) + { calc_uv_start_and_step(y1[i], swal[x + i], yrepeat, uv_height, fracbits, uv_pos[i], uv_step[i]); + magnifying |= uv_step[i] >> (fracbits - 1); + } + + SamplerSetup sampler[4]; + for (int i = 0; i < 4; i++) + sampler[i] = SamplerSetup(lwal[x + i] + xoffset, magnifying == 0, rw_pic, getcol); // Figure out where we vertically can start and stop drawing 4 columns in one go int middle_y1 = y1[0]; @@ -1305,7 +1320,7 @@ void wallscan_any( if (!fixed) R_SetColorMapLight(basecolormap, lights[i], wallshade); - wallscan_drawcol1(x + i, y1[i], y2[i], uv_pos[i], uv_step[i], uv_max, source[i], draw1column); + wallscan_drawcol1(x + i, y1[i], y2[i], uv_pos[i], uv_step[i], uv_max, sampler[i], draw1column); } continue; } @@ -1317,7 +1332,7 @@ void wallscan_any( R_SetColorMapLight(basecolormap, lights[i], wallshade); if (y1[i] < middle_y1) - uv_pos[i] = wallscan_drawcol1(x + i, y1[i], middle_y1, uv_pos[i], uv_step[i], uv_max, source[i], draw1column); + uv_pos[i] = wallscan_drawcol1(x + i, y1[i], middle_y1, uv_pos[i], uv_step[i], uv_max, sampler[i], draw1column); } // Draw the area where all 4 columns are active @@ -1337,7 +1352,7 @@ void wallscan_any( } } } - wallscan_drawcol4(x, middle_y1, middle_y2, uv_pos, uv_step, uv_max, source, draw4columns); + wallscan_drawcol4(x, middle_y1, middle_y2, uv_pos, uv_step, uv_max, sampler, draw4columns); // Draw the last rows where not all 4 columns are active for (int i = 0; i < 4; i++) @@ -1346,7 +1361,7 @@ void wallscan_any( R_SetColorMapLight(basecolormap, lights[i], wallshade); if (middle_y2 < y2[i]) - uv_pos[i] = wallscan_drawcol1(x + i, middle_y2, y2[i], uv_pos[i], uv_step[i], uv_max, source[i], draw1column); + uv_pos[i] = wallscan_drawcol1(x + i, middle_y2, y2[i], uv_pos[i], uv_step[i], uv_max, sampler[i], draw1column); } } @@ -1361,12 +1376,11 @@ void wallscan_any( if (!fixed) R_SetColorMapLight(basecolormap, light, wallshade); - const BYTE *source = getcol(rw_pic, (lwal[x] + xoffset) >> FRACBITS); - uint32_t uv_start, uv_step; calc_uv_start_and_step(y1, swal[x], yrepeat, uv_height, fracbits, uv_start, uv_step); - wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, source, draw1column); + SamplerSetup sampler(lwal[x] + xoffset, uv_step >> (fracbits - 1) == 0, rw_pic, getcol); + wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, sampler, draw1column); } NetUpdate (); From c1b5ba5b9064997cbe9802f1b5df59a88231d4e3 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 21 Jun 2016 06:22:43 +0200 Subject: [PATCH 65/94] Added SSE versions of bilinear filtering --- src/r_draw_rgba.cpp | 163 +++++++---- src/r_draw_rgba.h | 82 ++++++ src/r_draw_rgba_sse.h | 657 +++++++++++++++++++++++++++--------------- 3 files changed, 611 insertions(+), 291 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index d85d9994b..869edaba1 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -58,7 +58,7 @@ extern float rw_lightstep; extern int wallshade; CVAR(Bool, r_multithreaded, true, 0) -CVAR(Bool, r_bilinear, false, 0) +CVAR(Bool, r_bilinear, true, 0) #ifndef NO_SSE @@ -1680,43 +1680,70 @@ public: xstep = _xstep; ystep = _ystep; - if (_xbits == 6 && _ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - uint32_t texdata; + fixed_t xmagnitude = abs((fixed_t)xstep) >> (32 - _xbits - FRACBITS); + fixed_t ymagnitude = abs((fixed_t)ystep) >> (32 - _ybits - FRACBITS); + fixed_t magnitude = xmagnitude + ymagnitude; - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - texdata = source[spot]; - if (texdata != 0) + bool magnifying = !r_bilinear || magnitude >> (FRACBITS - 1) == 0; + if (magnifying) + { + if (_xbits == 6 && _ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do { - *dest = shade_bgra(texdata, light, shade_constants); - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + *dest = alpha_blend(shade_bgra(texdata, light, shade_constants), *dest); + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + uint32_t texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + *dest = alpha_blend(shade_bgra(texdata, light, shade_constants), *dest); + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; - do + if (_xbits == 6 && _ybits == 6) { - uint32_t texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - if (texdata != 0) + // 64x64 is the most common case by far, so special case it. + do { - *dest = shade_bgra(texdata, light, shade_constants); - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); + *dest++ = alpha_blend(shade_bgra(sample_bilinear(source, xfrac, yfrac, 26, 26), light, shade_constants), *dest); + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + *dest++ = alpha_blend(shade_bgra(sample_bilinear(source, xfrac, yfrac, 32 - _xbits, 32 - _ybits), light, shade_constants), *dest); + xfrac += xstep; + yfrac += ystep; + } while (--count); + } } } }; @@ -2439,6 +2466,8 @@ class Mvlinec1RGBACommand : public DrawerCommand DWORD _texturefrac; int _count; const BYTE * RESTRICT _source; + const BYTE * RESTRICT _source2; + uint32_t _texturefracx; BYTE * RESTRICT _dest; int mvlinebits; int _pitch; @@ -2452,6 +2481,8 @@ public: _texturefrac = dc_texturefrac; _count = dc_count; _source = dc_source; + _source2 = dc_source2; + _texturefracx = dc_texturefracx; _dest = dc_dest; mvlinebits = ::mvlinebits; _pitch = dc_pitch; @@ -2468,6 +2499,8 @@ public: DWORD fracstep = _iscale * thread->num_cores; DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); const uint32 *source = (const uint32 *)_source; + const uint32 *source2 = (const uint32 *)_source2; + uint32_t texturefracx = _texturefracx; uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = mvlinebits; int pitch = _pitch * thread->num_cores; @@ -2475,13 +2508,25 @@ public: uint32_t light = calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; - do + if (_source2 == nullptr) { - uint32_t pix = source[frac >> bits]; - *dest = alpha_blend(shade_bgra(pix, light, shade_constants), *dest); - frac += fracstep; - dest += pitch; - } while (--count); + do + { + uint32_t pix = source[frac >> bits]; + *dest = alpha_blend(shade_bgra(pix, light, shade_constants), *dest); + frac += fracstep; + dest += pitch; + } while (--count); + } + else + { + do + { + *dest = alpha_blend(shade_bgra(sample_bilinear(source, source2, texturefracx, frac, bits), light, shade_constants), *dest); + frac += fracstep; + dest += pitch; + } while (--count); + } } }; @@ -2496,6 +2541,8 @@ class Mvlinec4RGBACommand : public DrawerCommand DWORD vplce[4]; DWORD vince[4]; const uint32 * RESTRICT bufplce[4]; + const uint32 * RESTRICT bufplce2[4]; + uint32_t buftexturefracx[4]; public: Mvlinec4RGBACommand() @@ -2511,6 +2558,8 @@ public: vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; bufplce[i] = (const uint32 *)::bufplce[i]; + bufplce2[i] = (const uint32_t *)::bufplce2[i]; + buftexturefracx[i] = ::buftexturefracx[i]; } } @@ -2541,15 +2590,29 @@ public: local_vince[i] *= thread->num_cores; } - do + if (bufplce2[0] == nullptr) { - uint32_t pix; - pix = bufplce[0][(place = local_vplce[0]) >> bits]; dest[0] = alpha_blend(shade_bgra(pix, light0, shade_constants), dest[0]); local_vplce[0] = place + local_vince[0]; - pix = bufplce[1][(place = local_vplce[1]) >> bits]; dest[1] = alpha_blend(shade_bgra(pix, light1, shade_constants), dest[1]); local_vplce[1] = place + local_vince[1]; - pix = bufplce[2][(place = local_vplce[2]) >> bits]; dest[2] = alpha_blend(shade_bgra(pix, light2, shade_constants), dest[2]); local_vplce[2] = place + local_vince[2]; - pix = bufplce[3][(place = local_vplce[3]) >> bits]; dest[3] = alpha_blend(shade_bgra(pix, light3, shade_constants), dest[3]); local_vplce[3] = place + local_vince[3]; - dest += pitch; - } while (--count); + do + { + uint32_t pix; + pix = bufplce[0][(place = local_vplce[0]) >> bits]; dest[0] = alpha_blend(shade_bgra(pix, light0, shade_constants), dest[0]); local_vplce[0] = place + local_vince[0]; + pix = bufplce[1][(place = local_vplce[1]) >> bits]; dest[1] = alpha_blend(shade_bgra(pix, light1, shade_constants), dest[1]); local_vplce[1] = place + local_vince[1]; + pix = bufplce[2][(place = local_vplce[2]) >> bits]; dest[2] = alpha_blend(shade_bgra(pix, light2, shade_constants), dest[2]); local_vplce[2] = place + local_vince[2]; + pix = bufplce[3][(place = local_vplce[3]) >> bits]; dest[3] = alpha_blend(shade_bgra(pix, light3, shade_constants), dest[3]); local_vplce[3] = place + local_vince[3]; + dest += pitch; + } while (--count); + } + else + { + do + { + dest[0] = alpha_blend(shade_bgra(sample_bilinear(bufplce[0], bufplce2[0], buftexturefracx[0], place = local_vplce[0], bits), light0, shade_constants), dest[0]); local_vplce[0] = place + local_vince[0]; + dest[1] = alpha_blend(shade_bgra(sample_bilinear(bufplce[1], bufplce2[1], buftexturefracx[1], place = local_vplce[1], bits), light1, shade_constants), dest[1]); local_vplce[1] = place + local_vince[1]; + dest[2] = alpha_blend(shade_bgra(sample_bilinear(bufplce[2], bufplce2[2], buftexturefracx[2], place = local_vplce[2], bits), light2, shade_constants), dest[2]); local_vplce[2] = place + local_vince[2]; + dest[3] = alpha_blend(shade_bgra(sample_bilinear(bufplce[3], bufplce2[3], buftexturefracx[3], place = local_vplce[3], bits), light3, shade_constants), dest[3]); local_vplce[3] = place + local_vince[3]; + dest += pitch; + } while (--count); + } } }; @@ -3719,10 +3782,7 @@ void R_DrawSpan_rgba() #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else - if (!r_bilinear) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); #endif } @@ -3776,10 +3836,7 @@ void vlinec4_rgba() #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else - if (!r_bilinear) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); #endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index a266ce878..0900e8997 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -478,6 +478,88 @@ FORCEINLINE uint32_t sample_bilinear(const uint32_t *texture, dsfixed_t xfrac, d return (alpha << 24) | (red << 16) | (green << 8) | blue; } +#ifndef NO_SSE +FORCEINLINE __m128i sample_bilinear4_sse(const uint32_t **col0, const uint32_t **col1, uint32_t texturefracx[4], uint32_t texturefracy[4], int ybits) +{ + uint32_t half = 1 << (ybits - 1); + + __m128i m127 = _mm_set1_epi16(127); + __m128i fg = _mm_setzero_si128(); + for (int i = 0; i < 4; i++) + { + uint32_t y = (texturefracy[i] - half) >> ybits; + + uint32_t inv_b = texturefracx[i]; + uint32_t inv_a = ((texturefracy[i] + half) >> (ybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t ab = a * b; + uint32_t invab = inv_a * b; + uint32_t ainvb = a * inv_b; + uint32_t invainvb = inv_a * inv_b; + __m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab); + __m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb); + + __m128i p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(col0[i] + y)), _mm_setzero_si128()); + __m128i p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(col1[i] + y)), _mm_setzero_si128()); + + __m128i tmp = _mm_adds_epu16(_mm_mullo_epi16(p0, ab_invab), _mm_mullo_epi16(p1, ainvb_invainvb)); + __m128i color = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_srli_si128(tmp, 8), tmp), m127), 8); + + fg = _mm_or_si128(_mm_srli_si128(fg, 4), _mm_slli_si128(_mm_packus_epi16(color, _mm_setzero_si128()), 12)); + } + return fg; +} + +FORCEINLINE __m128i sample_bilinear4_sse(const uint32_t *texture, dsfixed_t &xfrac, dsfixed_t &yfrac, dsfixed_t xstep, dsfixed_t ystep, int xbits, int ybits) +{ + int xshift = (32 - xbits); + int yshift = (32 - ybits); + int xmask = (1 << xshift) - 1; + int ymask = (1 << yshift) - 1; + uint32_t xhalf = 1 << (xbits - 1); + uint32_t yhalf = 1 << (ybits - 1); + + __m128i m127 = _mm_set1_epi16(127); + __m128i fg = _mm_setzero_si128(); + for (int i = 0; i < 4; i++) + { + uint32_t x = (xfrac - xhalf) >> xbits; + uint32_t y = (yfrac - yhalf) >> ybits; + + uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; + uint32_t p01 = texture[(y + 1 & ymask) + ((x & xmask) << yshift)]; + uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; + uint32_t p11 = texture[(y + 1 & ymask) + (((x + 1) & xmask) << yshift)]; + + uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; + uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t ab = a * b; + uint32_t invab = inv_a * b; + uint32_t ainvb = a * inv_b; + uint32_t invainvb = inv_a * inv_b; + __m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab); + __m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb); + + __m128i p0 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, p01, p00), _mm_setzero_si128()); + __m128i p1 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, p11, p10), _mm_setzero_si128()); + + __m128i tmp = _mm_adds_epu16(_mm_mullo_epi16(p0, ab_invab), _mm_mullo_epi16(p1, ainvb_invainvb)); + __m128i color = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_srli_si128(tmp, 8), tmp), m127), 8); + + fg = _mm_or_si128(_mm_srli_si128(fg, 4), _mm_slli_si128(_mm_packus_epi16(color, _mm_setzero_si128()), 12)); + + xfrac += xstep; + yfrac += ystep; + } + return fg; +} +#endif + // Calculate constants for a simple shade with gamma correction #define AVX_LINEAR_SHADE_SIMPLE_INIT(light) \ __m256 mlight_hi = _mm256_set_ps(1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f), 1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f)); \ diff --git a/src/r_draw_rgba_sse.h b/src/r_draw_rgba_sse.h index 220638c75..721471724 100644 --- a/src/r_draw_rgba_sse.h +++ b/src/r_draw_rgba_sse.h @@ -71,195 +71,284 @@ public: uint32_t light = calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; - if (_xbits == 6 && _ybits == 6) + fixed_t xmagnitude = abs((fixed_t)xstep) >> (32 - _xbits - FRACBITS); + fixed_t ymagnitude = abs((fixed_t)ystep) >> (32 - _ybits - FRACBITS); + fixed_t magnitude = xmagnitude + ymagnitude; + + bool magnifying = !r_bilinear || magnitude >> (FRACBITS - 1) == 0; + if (magnifying) { - // 64x64 is the most common case by far, so special case it. - - int sse_count = count / 4; - count -= sse_count * 4; - - if (shade_constants.simple_shade) + if (_xbits == 6 && _ybits == 6) { - VEC_SHADE_SIMPLE_INIT(light); + // 64x64 is the most common case by far, so special case it. - while (sse_count--) + int sse_count = count / 4; + count -= sse_count * 4; + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT(light); + + while (sse_count--) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + + // Next step in u,v. + dest += 4; + } + } + else + { + VEC_SHADE_INIT(light, shade_constants); + + while (sse_count--) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + + // Next step in u,v. + dest += 4; + } + } + + if (count == 0) + return; + + do { // Current texture index in u,v. spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - VEC_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); + // Lookup pixel from flat texture tile + *dest++ = shade_bgra(source[spot], light, shade_constants); // Next step in u,v. - dest += 4; - } + xfrac += xstep; + yfrac += ystep; + } while (--count); } else { - VEC_SHADE_INIT(light, shade_constants); + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; - while (sse_count--) + int sse_count = count / 4; + count -= sse_count * 4; + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT(light); + + while (sse_count--) + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += 4; + } + } + else + { + VEC_SHADE_INIT(light, shade_constants); + + while (sse_count--) + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + dest += 4; + } + } + + if (count == 0) + return; + + do { // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - VEC_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); + // Lookup pixel from flat texture tile + *dest++ = shade_bgra(source[spot], light, shade_constants); // Next step in u,v. - dest += 4; - } + xfrac += xstep; + yfrac += ystep; + } while (--count); } - - if (count == 0) - return; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; - - int sse_count = count / 4; - count -= sse_count * 4; - - if (shade_constants.simple_shade) + if (_xbits == 6 && _ybits == 6) { - VEC_SHADE_SIMPLE_INIT(light); + // 64x64 is the most common case by far, so special case it. - while (sse_count--) + int sse_count = count / 4; + count -= sse_count * 4; + + if (shade_constants.simple_shade) { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - VEC_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += 4; + VEC_SHADE_SIMPLE_INIT(light); + while (sse_count--) + { + __m128i fg = sample_bilinear4_sse(source, xfrac, yfrac, xstep, ystep, 26, 26); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += 4; + } } + else + { + VEC_SHADE_INIT(light, shade_constants); + while (sse_count--) + { + __m128i fg = sample_bilinear4_sse(source, xfrac, yfrac, xstep, ystep, 26, 26); + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + dest += 4; + } + } + + if (count == 0) + return; + + do + { + *dest++ = shade_bgra(sample_bilinear(source, xfrac, yfrac, 26, 26), light, shade_constants); + xfrac += xstep; + yfrac += ystep; + } while (--count); } else { - VEC_SHADE_INIT(light, shade_constants); + int sse_count = count / 4; + count -= sse_count * 4; - while (sse_count--) + if (shade_constants.simple_shade) { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - VEC_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - dest += 4; + VEC_SHADE_SIMPLE_INIT(light); + while (sse_count--) + { + __m128i fg = sample_bilinear4_sse(source, xfrac, yfrac, xstep, ystep, 32 -_xbits, 32 - _ybits); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += 4; + } } + else + { + VEC_SHADE_INIT(light, shade_constants); + while (sse_count--) + { + __m128i fg = sample_bilinear4_sse(source, xfrac, yfrac, xstep, ystep, 32 - _xbits, 32 - _ybits); + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + dest += 4; + } + } + + if (count == 0) + return; + + do + { + *dest++ = shade_bgra(sample_bilinear(source, xfrac, yfrac, 32 - _xbits, 32 - _ybits), light, shade_constants); + xfrac += xstep; + yfrac += ystep; + } while (--count); } - - if (count == 0) - return; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); } } }; @@ -275,6 +364,8 @@ class VecCommand(Vlinec4RGBA) : public DrawerCommand DWORD vplce[4]; DWORD vince[4]; const uint32 * RESTRICT bufplce[4]; + const uint32_t * RESTRICT bufplce2[4]; + uint32_t buftexturefracx[4]; public: VecCommand(Vlinec4RGBA)() @@ -290,6 +381,8 @@ public: vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; bufplce[i] = (const uint32 *)::bufplce[i]; + bufplce2[i] = (const uint32_t *)::bufplce2[i]; + buftexturefracx[i] = ::buftexturefracx[i]; } } @@ -319,57 +412,97 @@ public: local_vince[i] *= thread->num_cores; } - if (shade_constants.simple_shade) + if (bufplce2[0] == nullptr) { - VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - do + if (shade_constants.simple_shade) { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; + VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; - uint32_t p0 = bufplce[0][place0 >> bits]; - uint32_t p1 = bufplce[1][place1 >> bits]; - uint32_t p2 = bufplce[2][place2 >> bits]; - uint32_t p3 = bufplce[3][place3 >> bits]; + uint32_t p0 = bufplce[0][place0 >> bits]; + uint32_t p1 = bufplce[1][place1 >> bits]; + uint32_t p2 = bufplce[2][place2 >> bits]; + uint32_t p3 = bufplce[3][place3 >> bits]; - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - VEC_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + uint32_t p0 = bufplce[0][place0 >> bits]; + uint32_t p1 = bufplce[1][place1 >> bits]; + uint32_t p2 = bufplce[2][place2 >> bits]; + uint32_t p3 = bufplce[3][place3 >> bits]; + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + dest += pitch; + } while (--count); + } } else { - VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - do + if (shade_constants.simple_shade) { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; + VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + do + { + __m128i fg = sample_bilinear4_sse(bufplce, bufplce2, buftexturefracx, local_vplce, bits); - uint32_t p0 = bufplce[0][place0 >> bits]; - uint32_t p1 = bufplce[1][place1 >> bits]; - uint32_t p2 = bufplce[2][place2 >> bits]; - uint32_t p3 = bufplce[3][place3 >> bits]; + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + do + { + __m128i fg = sample_bilinear4_sse(bufplce, bufplce2, buftexturefracx, local_vplce, bits); - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - VEC_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; + + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + dest += pitch; + } while (--count); + } } } }; @@ -385,6 +518,8 @@ class VecCommand(Mvlinec4RGBA) : public DrawerCommand DWORD vplce[4]; DWORD vince[4]; const uint32 * RESTRICT bufplce[4]; + const uint32 * RESTRICT bufplce2[4]; + uint32_t buftexturefracx[4]; public: VecCommand(Mvlinec4RGBA)() @@ -400,6 +535,8 @@ public: vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; bufplce[i] = (const uint32 *)::bufplce[i]; + bufplce2[i] = (const uint32_t *)::bufplce2[i]; + buftexturefracx[i] = ::buftexturefracx[i]; } } @@ -429,61 +566,105 @@ public: local_vince[i] *= thread->num_cores; } - if (shade_constants.simple_shade) + if (bufplce2[0] == nullptr) { - VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - do + if (shade_constants.simple_shade) { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; + VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; - uint32_t pix0 = bufplce[0][place0 >> bits]; - uint32_t pix1 = bufplce[1][place1 >> bits]; - uint32_t pix2 = bufplce[2][place2 >> bits]; - uint32_t pix3 = bufplce[3][place3 >> bits]; + uint32_t pix0 = bufplce[0][place0 >> bits]; + uint32_t pix1 = bufplce[1][place1 >> bits]; + uint32_t pix2 = bufplce[2][place2 >> bits]; + uint32_t pix3 = bufplce[3][place3 >> bits]; - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - VEC_SHADE_SIMPLE(fg); - VEC_ALPHA_BLEND(fg, bg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + VEC_SHADE_SIMPLE(fg); + VEC_ALPHA_BLEND(fg, bg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + uint32_t pix0 = bufplce[0][place0 >> bits]; + uint32_t pix1 = bufplce[1][place1 >> bits]; + uint32_t pix2 = bufplce[2][place2 >> bits]; + uint32_t pix3 = bufplce[3][place3 >> bits]; + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + VEC_SHADE(fg, shade_constants); + VEC_ALPHA_BLEND(fg, bg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += pitch; + } while (--count); + } } else { - VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - do + if (shade_constants.simple_shade) { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; + VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + do + { + __m128i fg = sample_bilinear4_sse(bufplce, bufplce2, buftexturefracx, local_vplce, bits); - uint32_t pix0 = bufplce[0][place0 >> bits]; - uint32_t pix1 = bufplce[1][place1 >> bits]; - uint32_t pix2 = bufplce[2][place2 >> bits]; - uint32_t pix3 = bufplce[3][place3 >> bits]; + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + VEC_SHADE_SIMPLE(fg); + VEC_ALPHA_BLEND(fg, bg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + do + { + __m128i fg = sample_bilinear4_sse(bufplce, bufplce2, buftexturefracx, local_vplce, bits); - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - VEC_SHADE(fg, shade_constants); - VEC_ALPHA_BLEND(fg, bg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; + + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + VEC_SHADE(fg, shade_constants); + VEC_ALPHA_BLEND(fg, bg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += pitch; + } while (--count); + } } } }; From d15af1524cebd3e000bbd7971d9b5e51205cfde6 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 21 Jun 2016 09:38:47 +0200 Subject: [PATCH 66/94] Added mipmap support for floor and ceiling --- src/r_draw.cpp | 6 +-- src/r_draw.h | 2 +- src/r_draw_rgba.cpp | 119 +++++++++++++++++++++++++++++++++++++----- src/r_draw_rgba.h | 33 ++++++++++++ src/r_draw_rgba_sse.h | 9 ++-- src/r_plane.cpp | 5 +- src/r_swrenderer.cpp | 1 + src/v_draw.cpp | 2 +- 8 files changed, 150 insertions(+), 27 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 83c4ac8d4..73ddb72f8 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -1062,13 +1062,13 @@ extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource; // //========================================================================== -void R_SetSpanSource(const BYTE *pixels) +void R_SetSpanSource(FTexture *tex) { - ds_source = pixels; + R_SetMipmappedSpanSource(tex); #ifdef X86_ASM if (!r_swtruecolor && ds_cursource != ds_source) { - R_SetSpanSource_ASM(pixels); + R_SetSpanSource_ASM(ds_source); } #endif } diff --git a/src/r_draw.h b/src/r_draw.h index d5ecbd289..b662ddcee 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -122,7 +122,7 @@ extern void (*R_DrawTranslatedColumn)(void); extern void (*R_DrawSpan)(void); void R_SetupSpanBits(FTexture *tex); void R_SetSpanColormap(FDynamicColormap *colormap, int shade); -void R_SetSpanSource(const BYTE *pixels); +void R_SetSpanSource(FTexture *tex); // Span drawing for masked textures. extern void (*R_DrawSpanMasked)(void); diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 869edaba1..9cdcdbf80 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -59,6 +59,7 @@ extern int wallshade; CVAR(Bool, r_multithreaded, true, 0) CVAR(Bool, r_bilinear, true, 0) +CVAR(Bool, r_mipmap, true, 0) #ifndef NO_SSE @@ -1502,6 +1503,7 @@ class DrawSpanRGBACommand : public DrawerCommand BYTE * RESTRICT _destorg; fixed_t _light; ShadeConstants _shade_constants; + bool _magnifying; public: DrawSpanRGBACommand() @@ -1519,6 +1521,7 @@ public: _destorg = dc_destorg; _light = ds_light; _shade_constants = ds_shade_constants; + _magnifying = !span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); } void Execute(DrawerThread *thread) override @@ -1548,12 +1551,7 @@ public: uint32_t light = calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; - fixed_t xmagnitude = abs((fixed_t)xstep) >> (32 - _xbits - FRACBITS); - fixed_t ymagnitude = abs((fixed_t)ystep) >> (32 - _ybits - FRACBITS); - fixed_t magnitude = xmagnitude + ymagnitude; - - bool magnifying = !r_bilinear || magnitude >> (FRACBITS - 1) == 0; - if (magnifying) + if (_magnifying) { if (_xbits == 6 && _ybits == 6) { @@ -1634,6 +1632,7 @@ class DrawSpanMaskedRGBACommand : public DrawerCommand fixed_t _ystep; int _xbits; int _ybits; + bool _magnifying; public: DrawSpanMaskedRGBACommand() @@ -1651,6 +1650,7 @@ public: _ystep = ds_ystep; _xbits = ds_xbits; _ybits = ds_ybits; + _magnifying = !span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); } void Execute(DrawerThread *thread) override @@ -1680,12 +1680,7 @@ public: xstep = _xstep; ystep = _ystep; - fixed_t xmagnitude = abs((fixed_t)xstep) >> (32 - _xbits - FRACBITS); - fixed_t ymagnitude = abs((fixed_t)ystep) >> (32 - _ybits - FRACBITS); - fixed_t magnitude = xmagnitude + ymagnitude; - - bool magnifying = !r_bilinear || magnitude >> (FRACBITS - 1) == 0; - if (magnifying) + if (_magnifying) { if (_xbits == 6 && _ybits == 6) { @@ -3677,6 +3672,106 @@ void ApplySpecialColormapRGBACommand::Execute(DrawerThread *thread) ///////////////////////////////////////////////////////////////////////////// +#include + +class MipmappedTexture +{ +public: + MipmappedTexture(FTexture *texture) + { + const uint32_t *base_texture = texture->GetPixelsBgra(); + Width = texture->GetWidth(); + Height = texture->GetHeight(); + Levels = MAX(texture->WidthBits, texture->HeightBits); + + // I bet there is a better way to calculate this.. + int buffersize = 0; + for (int i = 0; i < Levels; i++) + { + int w = MAX(Width >> i, 2); // 2 instead of 1 because we texelGather in 2x2 blocks + int h = MAX(Height >> i, 2); + buffersize += w * h; + } + Pixels.resize(buffersize); + + // Base level: + memcpy(Pixels.data(), base_texture, Width * Height * 4); + + // Mipmap levels: + uint32_t *src = Pixels.data(); + uint32_t *dest = src + Width * Height; + for (int i = 1; i < Levels; i++) + { + int srch = MAX(Height >> (i - 1), 2); + int w = MAX(Width >> i, 2); + int h = MAX(Height >> i, 2); + + for (int x = 0; x < w; x++) + { + for (int y = 0; y < h; y++) + { + uint32_t src00 = src[y * 2 + x * 2 * srch]; + uint32_t src01 = src[y * 2 + 1 + x * 2 * srch]; + uint32_t src10 = src[y * 2 + (x * 2 + 1) * srch]; + uint32_t src11 = src[y * 2 + 1 + (x * 2 + 1) * srch]; + + uint32_t alpha = (APART(src00) + APART(src01) + APART(src10) + APART(src11) + 2) / 4; + uint32_t red = (RPART(src00) + RPART(src01) + RPART(src10) + RPART(src11) + 2) / 4; + uint32_t green = (GPART(src00) + GPART(src01) + GPART(src10) + GPART(src11) + 2) / 4; + uint32_t blue = (BPART(src00) + BPART(src01) + BPART(src10) + BPART(src11) + 2) / 4; + + dest[y + x * h] = (alpha << 24) | (red << 16) | (green << 8) | blue; + } + } + + src = dest; + dest += w * h; + } + } + + int Width = 0; + int Height = 0; + int Levels = 0; + std::vector Pixels; +}; + +class TextureMipmapper +{ +public: + static std::map> &Textures() + { + static std::map> textures; + return textures; + } +}; + +void R_SetMipmappedSpanSource(FTexture *tex) +{ + if (r_swtruecolor) + { + if (r_mipmap) + { + auto &mipmap = TextureMipmapper::Textures()[tex]; + if (!mipmap) + mipmap = std::make_shared(tex); + ds_source = (const BYTE*)mipmap->Pixels.data(); + } + else + { + ds_source = (const BYTE*)tex->GetPixelsBgra(); + } + } + else + { + ds_source = tex->GetPixels(); + } +} + +void R_ClearMipmapCache() +{ + TextureMipmapper::Textures().clear(); +} + void R_BeginDrawerCommands() { DrawerCommandQueue::Begin(); diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 0900e8997..37dc1a70a 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -108,6 +108,9 @@ void tmvline4_revsubclamp_rgba(); void R_FillColumnHoriz_rgba(); void R_FillSpan_rgba(); +void R_SetMipmappedSpanSource(FTexture *tex); +void R_ClearMipmapCache(); + ///////////////////////////////////////////////////////////////////////////// // Multithreaded rendering infrastructure: @@ -185,6 +188,7 @@ public: }; EXTERN_CVAR(Bool, r_multithreaded) +EXTERN_CVAR(Bool, r_mipmap) // Manages queueing up commands and executing them on worker threads class DrawerCommandQueue @@ -426,6 +430,35 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) return 0xff000000 | (red << 16) | (green << 8) | blue; } +inline bool span_sampler_setup(const uint32_t *&source, int &xbits, int &ybits, fixed_t xstep, fixed_t ystep) +{ + if (!r_bilinear) + return false; + + // Is this a magfilter or minfilter? + fixed_t xmagnitude = abs(xstep) >> (32 - xbits - FRACBITS); + fixed_t ymagnitude = abs(ystep) >> (32 - ybits - FRACBITS); + fixed_t magnitude = (xmagnitude + ymagnitude) * 3 + (1 << (FRACBITS -1)); + if (magnitude >> FRACBITS == 0) + return false; + + if (r_mipmap) + { + int level = magnitude >> (FRACBITS + 1); + while (level != 0) + { + if (xbits <= 2 || ybits <= 2) + break; + + source += (1 << (xbits)) * (1 << (ybits)); + xbits -= 1; + ybits -= 1; + level >>= 1; + } + } + return true; +} + FORCEINLINE uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, int ybits) { uint32_t half = 1 << (ybits - 1); diff --git a/src/r_draw_rgba_sse.h b/src/r_draw_rgba_sse.h index 721471724..4002a5535 100644 --- a/src/r_draw_rgba_sse.h +++ b/src/r_draw_rgba_sse.h @@ -25,6 +25,7 @@ class VecCommand(DrawSpanRGBA) : public DrawerCommand BYTE * RESTRICT _destorg; fixed_t _light; ShadeConstants _shade_constants; + bool _magnifying; public: VecCommand(DrawSpanRGBA)() @@ -42,6 +43,7 @@ public: _destorg = dc_destorg; _light = ds_light; _shade_constants = ds_shade_constants; + _magnifying = !span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); } void Execute(DrawerThread *thread) override @@ -71,12 +73,7 @@ public: uint32_t light = calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; - fixed_t xmagnitude = abs((fixed_t)xstep) >> (32 - _xbits - FRACBITS); - fixed_t ymagnitude = abs((fixed_t)ystep) >> (32 - _ybits - FRACBITS); - fixed_t magnitude = xmagnitude + ymagnitude; - - bool magnifying = !r_bilinear || magnitude >> (FRACBITS - 1) == 0; - if (magnifying) + if (_magnifying) { if (_xbits == 6 && _ybits == 6) { diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 807066f77..6913db918 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -1178,10 +1178,7 @@ void R_DrawSinglePlane (visplane_t *pl, fixed_t alpha, bool additive, bool maske R_SetupSpanBits(tex); double xscale = pl->xform.xScale * tex->Scale.X; double yscale = pl->xform.yScale * tex->Scale.Y; - if (r_swtruecolor) - ds_source = (const BYTE*)tex->GetPixelsBgra(); - else - ds_source = tex->GetPixels(); + R_SetSpanSource(tex); basecolormap = pl->colormap; planeshade = LIGHT2SHADE(pl->lightlevel); diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index c81d2a110..c1e2d4bd0 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -170,6 +170,7 @@ void FSoftwareRenderer::RenderView(player_t *player) R_InitColumnDrawers(); } + R_ClearMipmapCache(); R_BeginDrawerCommands(); R_RenderActorView (player->mo); // [RH] Let cameras draw onto textures that were visible this frame. diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 6a8dad047..fd12a1587 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -1404,7 +1404,7 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, R_SetSpanColormap(colormap, clamp(shade >> FRACBITS, 0, NUMCOLORMAPS - 1)); else R_SetSpanColormap(&identitycolormap, 0); - R_SetSpanSource(r_swtruecolor ? (const BYTE*)tex->GetPixelsBgra() : tex->GetPixels()); + R_SetSpanSource(tex); scalex = double(1u << (32 - ds_xbits)) / scalex; scaley = double(1u << (32 - ds_ybits)) / scaley; ds_xstep = xs_RoundToInt(cosrot * scalex); From c235de5c22f6f7aebbf36aa3f80a45e0f5f6accf Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 21 Jun 2016 21:55:08 +0200 Subject: [PATCH 67/94] Native mipmap support to FTexture --- src/r_draw.cpp | 2 +- src/r_draw_rgba.cpp | 100 ---------------------- src/r_draw_rgba.h | 157 +++++++++++++++++------------------ src/r_draw_rgba_sse.h | 28 +++++-- src/r_swrenderer.cpp | 1 - src/textures/jpegtexture.cpp | 4 +- src/textures/pngtexture.cpp | 3 +- src/textures/texture.cpp | 68 ++++++++++++++- src/textures/textures.h | 4 + 9 files changed, 171 insertions(+), 196 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 73ddb72f8..55353a006 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -1064,7 +1064,7 @@ extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource; void R_SetSpanSource(FTexture *tex) { - R_SetMipmappedSpanSource(tex); + ds_source = r_swtruecolor ? (const BYTE*)tex->GetPixelsBgra() : tex->GetPixels(); #ifdef X86_ASM if (!r_swtruecolor && ds_cursource != ds_source) { diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 9cdcdbf80..8144c096d 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -3672,106 +3672,6 @@ void ApplySpecialColormapRGBACommand::Execute(DrawerThread *thread) ///////////////////////////////////////////////////////////////////////////// -#include - -class MipmappedTexture -{ -public: - MipmappedTexture(FTexture *texture) - { - const uint32_t *base_texture = texture->GetPixelsBgra(); - Width = texture->GetWidth(); - Height = texture->GetHeight(); - Levels = MAX(texture->WidthBits, texture->HeightBits); - - // I bet there is a better way to calculate this.. - int buffersize = 0; - for (int i = 0; i < Levels; i++) - { - int w = MAX(Width >> i, 2); // 2 instead of 1 because we texelGather in 2x2 blocks - int h = MAX(Height >> i, 2); - buffersize += w * h; - } - Pixels.resize(buffersize); - - // Base level: - memcpy(Pixels.data(), base_texture, Width * Height * 4); - - // Mipmap levels: - uint32_t *src = Pixels.data(); - uint32_t *dest = src + Width * Height; - for (int i = 1; i < Levels; i++) - { - int srch = MAX(Height >> (i - 1), 2); - int w = MAX(Width >> i, 2); - int h = MAX(Height >> i, 2); - - for (int x = 0; x < w; x++) - { - for (int y = 0; y < h; y++) - { - uint32_t src00 = src[y * 2 + x * 2 * srch]; - uint32_t src01 = src[y * 2 + 1 + x * 2 * srch]; - uint32_t src10 = src[y * 2 + (x * 2 + 1) * srch]; - uint32_t src11 = src[y * 2 + 1 + (x * 2 + 1) * srch]; - - uint32_t alpha = (APART(src00) + APART(src01) + APART(src10) + APART(src11) + 2) / 4; - uint32_t red = (RPART(src00) + RPART(src01) + RPART(src10) + RPART(src11) + 2) / 4; - uint32_t green = (GPART(src00) + GPART(src01) + GPART(src10) + GPART(src11) + 2) / 4; - uint32_t blue = (BPART(src00) + BPART(src01) + BPART(src10) + BPART(src11) + 2) / 4; - - dest[y + x * h] = (alpha << 24) | (red << 16) | (green << 8) | blue; - } - } - - src = dest; - dest += w * h; - } - } - - int Width = 0; - int Height = 0; - int Levels = 0; - std::vector Pixels; -}; - -class TextureMipmapper -{ -public: - static std::map> &Textures() - { - static std::map> textures; - return textures; - } -}; - -void R_SetMipmappedSpanSource(FTexture *tex) -{ - if (r_swtruecolor) - { - if (r_mipmap) - { - auto &mipmap = TextureMipmapper::Textures()[tex]; - if (!mipmap) - mipmap = std::make_shared(tex); - ds_source = (const BYTE*)mipmap->Pixels.data(); - } - else - { - ds_source = (const BYTE*)tex->GetPixelsBgra(); - } - } - else - { - ds_source = tex->GetPixels(); - } -} - -void R_ClearMipmapCache() -{ - TextureMipmapper::Textures().clear(); -} - void R_BeginDrawerCommands() { DrawerCommandQueue::Begin(); diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 37dc1a70a..4808cb257 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -108,9 +108,6 @@ void tmvline4_revsubclamp_rgba(); void R_FillColumnHoriz_rgba(); void R_FillSpan_rgba(); -void R_SetMipmappedSpanSource(FTexture *tex); -void R_ClearMipmapCache(); - ///////////////////////////////////////////////////////////////////////////// // Multithreaded rendering infrastructure: @@ -494,9 +491,9 @@ FORCEINLINE uint32_t sample_bilinear(const uint32_t *texture, dsfixed_t xfrac, d uint32_t y = (yfrac - yhalf) >> ybits; uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; - uint32_t p01 = texture[(y + 1 & ymask) + ((x & xmask) << yshift)]; + uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)]; uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; - uint32_t p11 = texture[(y + 1 & ymask) + (((x + 1) & xmask) << yshift)]; + uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)]; uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; @@ -511,87 +508,81 @@ FORCEINLINE uint32_t sample_bilinear(const uint32_t *texture, dsfixed_t xfrac, d return (alpha << 24) | (red << 16) | (green << 8) | blue; } -#ifndef NO_SSE -FORCEINLINE __m128i sample_bilinear4_sse(const uint32_t **col0, const uint32_t **col1, uint32_t texturefracx[4], uint32_t texturefracy[4], int ybits) -{ - uint32_t half = 1 << (ybits - 1); - - __m128i m127 = _mm_set1_epi16(127); - __m128i fg = _mm_setzero_si128(); - for (int i = 0; i < 4; i++) - { - uint32_t y = (texturefracy[i] - half) >> ybits; - - uint32_t inv_b = texturefracx[i]; - uint32_t inv_a = ((texturefracy[i] + half) >> (ybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t ab = a * b; - uint32_t invab = inv_a * b; - uint32_t ainvb = a * inv_b; - uint32_t invainvb = inv_a * inv_b; - __m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab); - __m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb); - - __m128i p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(col0[i] + y)), _mm_setzero_si128()); - __m128i p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(col1[i] + y)), _mm_setzero_si128()); - - __m128i tmp = _mm_adds_epu16(_mm_mullo_epi16(p0, ab_invab), _mm_mullo_epi16(p1, ainvb_invainvb)); - __m128i color = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_srli_si128(tmp, 8), tmp), m127), 8); - - fg = _mm_or_si128(_mm_srli_si128(fg, 4), _mm_slli_si128(_mm_packus_epi16(color, _mm_setzero_si128()), 12)); - } - return fg; +#define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, ybits) { \ + uint32_t half = 1 << (ybits - 1); \ + \ + __m128i m127 = _mm_set1_epi16(127); \ + fg = _mm_setzero_si128(); \ + for (int i = 0; i < 4; i++) \ + { \ + uint32_t y = (texturefracy[i] - half) >> ybits; \ + \ + uint32_t inv_b = texturefracx[i]; \ + uint32_t inv_a = ((texturefracy[i] + half) >> (ybits - 4)) & 15; \ + uint32_t a = 16 - inv_a; \ + uint32_t b = 16 - inv_b; \ + \ + uint32_t ab = a * b; \ + uint32_t invab = inv_a * b; \ + uint32_t ainvb = a * inv_b; \ + uint32_t invainvb = inv_a * inv_b; \ + __m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab); \ + __m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb); \ + \ + __m128i p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(col0[i] + y)), _mm_setzero_si128()); \ + __m128i p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(col1[i] + y)), _mm_setzero_si128()); \ + \ + __m128i tmp = _mm_adds_epu16(_mm_mullo_epi16(p0, ab_invab), _mm_mullo_epi16(p1, ainvb_invainvb)); \ + __m128i color = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_srli_si128(tmp, 8), tmp), m127), 8); \ + \ + fg = _mm_or_si128(_mm_srli_si128(fg, 4), _mm_slli_si128(_mm_packus_epi16(color, _mm_setzero_si128()), 12)); \ + } \ } -FORCEINLINE __m128i sample_bilinear4_sse(const uint32_t *texture, dsfixed_t &xfrac, dsfixed_t &yfrac, dsfixed_t xstep, dsfixed_t ystep, int xbits, int ybits) -{ - int xshift = (32 - xbits); - int yshift = (32 - ybits); - int xmask = (1 << xshift) - 1; - int ymask = (1 << yshift) - 1; - uint32_t xhalf = 1 << (xbits - 1); - uint32_t yhalf = 1 << (ybits - 1); - - __m128i m127 = _mm_set1_epi16(127); - __m128i fg = _mm_setzero_si128(); - for (int i = 0; i < 4; i++) - { - uint32_t x = (xfrac - xhalf) >> xbits; - uint32_t y = (yfrac - yhalf) >> ybits; - - uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; - uint32_t p01 = texture[(y + 1 & ymask) + ((x & xmask) << yshift)]; - uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; - uint32_t p11 = texture[(y + 1 & ymask) + (((x + 1) & xmask) << yshift)]; - - uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; - uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t ab = a * b; - uint32_t invab = inv_a * b; - uint32_t ainvb = a * inv_b; - uint32_t invainvb = inv_a * inv_b; - __m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab); - __m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb); - - __m128i p0 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, p01, p00), _mm_setzero_si128()); - __m128i p1 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, p11, p10), _mm_setzero_si128()); - - __m128i tmp = _mm_adds_epu16(_mm_mullo_epi16(p0, ab_invab), _mm_mullo_epi16(p1, ainvb_invainvb)); - __m128i color = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_srli_si128(tmp, 8), tmp), m127), 8); - - fg = _mm_or_si128(_mm_srli_si128(fg, 4), _mm_slli_si128(_mm_packus_epi16(color, _mm_setzero_si128()), 12)); - - xfrac += xstep; - yfrac += ystep; - } - return fg; +#define VEC_SAMPLE_BILINEAR4_SPAN(fg, texture, xfrac, yfrac, xstep, ystep, xbits, ybits) { \ + int xshift = (32 - xbits); \ + int yshift = (32 - ybits); \ + int xmask = (1 << xshift) - 1; \ + int ymask = (1 << yshift) - 1; \ + uint32_t xhalf = 1 << (xbits - 1); \ + uint32_t yhalf = 1 << (ybits - 1); \ + \ + __m128i m127 = _mm_set1_epi16(127); \ + fg = _mm_setzero_si128(); \ + for (int i = 0; i < 4; i++) \ + { \ + uint32_t x = (xfrac - xhalf) >> xbits; \ + uint32_t y = (yfrac - yhalf) >> ybits; \ + \ + uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; \ + uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)]; \ + uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; \ + uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)]; \ + \ + uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; \ + uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; \ + uint32_t a = 16 - inv_a; \ + uint32_t b = 16 - inv_b; \ + \ + uint32_t ab = a * b; \ + uint32_t invab = inv_a * b; \ + uint32_t ainvb = a * inv_b; \ + uint32_t invainvb = inv_a * inv_b; \ + __m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab); \ + __m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb); \ + \ + __m128i p0 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, p01, p00), _mm_setzero_si128()); \ + __m128i p1 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, p11, p10), _mm_setzero_si128()); \ + \ + __m128i tmp = _mm_adds_epu16(_mm_mullo_epi16(p0, ab_invab), _mm_mullo_epi16(p1, ainvb_invainvb)); \ + __m128i color = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_srli_si128(tmp, 8), tmp), m127), 8); \ + \ + fg = _mm_or_si128(_mm_srli_si128(fg, 4), _mm_slli_si128(_mm_packus_epi16(color, _mm_setzero_si128()), 12)); \ + \ + xfrac += xstep; \ + yfrac += ystep; \ + } \ } -#endif // Calculate constants for a simple shade with gamma correction #define AVX_LINEAR_SHADE_SIMPLE_INIT(light) \ diff --git a/src/r_draw_rgba_sse.h b/src/r_draw_rgba_sse.h index 4002a5535..af761c6e7 100644 --- a/src/r_draw_rgba_sse.h +++ b/src/r_draw_rgba_sse.h @@ -280,7 +280,8 @@ public: VEC_SHADE_SIMPLE_INIT(light); while (sse_count--) { - __m128i fg = sample_bilinear4_sse(source, xfrac, yfrac, xstep, ystep, 26, 26); + __m128i fg; + VEC_SAMPLE_BILINEAR4_SPAN(fg, source, xfrac, yfrac, xstep, ystep, 26, 26); VEC_SHADE_SIMPLE(fg); _mm_storeu_si128((__m128i*)dest, fg); dest += 4; @@ -291,7 +292,8 @@ public: VEC_SHADE_INIT(light, shade_constants); while (sse_count--) { - __m128i fg = sample_bilinear4_sse(source, xfrac, yfrac, xstep, ystep, 26, 26); + __m128i fg; + VEC_SAMPLE_BILINEAR4_SPAN(fg, source, xfrac, yfrac, xstep, ystep, 26, 26); VEC_SHADE(fg, shade_constants); _mm_storeu_si128((__m128i*)dest, fg); dest += 4; @@ -318,7 +320,10 @@ public: VEC_SHADE_SIMPLE_INIT(light); while (sse_count--) { - __m128i fg = sample_bilinear4_sse(source, xfrac, yfrac, xstep, ystep, 32 -_xbits, 32 - _ybits); + __m128i fg; + int tmpx = 32 - _xbits; + int tmpy = 32 - _ybits; + VEC_SAMPLE_BILINEAR4_SPAN(fg, source, xfrac, yfrac, xstep, ystep, tmpx, tmpy); VEC_SHADE_SIMPLE(fg); _mm_storeu_si128((__m128i*)dest, fg); dest += 4; @@ -329,7 +334,10 @@ public: VEC_SHADE_INIT(light, shade_constants); while (sse_count--) { - __m128i fg = sample_bilinear4_sse(source, xfrac, yfrac, xstep, ystep, 32 - _xbits, 32 - _ybits); + __m128i fg; + int tmpx = 32 - _xbits; + int tmpy = 32 - _ybits; + VEC_SAMPLE_BILINEAR4_SPAN(fg, source, xfrac, yfrac, xstep, ystep, tmpx, tmpy); VEC_SHADE(fg, shade_constants); _mm_storeu_si128((__m128i*)dest, fg); dest += 4; @@ -471,7 +479,8 @@ public: VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); do { - __m128i fg = sample_bilinear4_sse(bufplce, bufplce2, buftexturefracx, local_vplce, bits); + __m128i fg; + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -488,7 +497,8 @@ public: VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); do { - __m128i fg = sample_bilinear4_sse(bufplce, bufplce2, buftexturefracx, local_vplce, bits); + __m128i fg; + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -629,7 +639,8 @@ public: VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); do { - __m128i fg = sample_bilinear4_sse(bufplce, bufplce2, buftexturefracx, local_vplce, bits); + __m128i fg; + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -648,7 +659,8 @@ public: VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); do { - __m128i fg = sample_bilinear4_sse(bufplce, bufplce2, buftexturefracx, local_vplce, bits); + __m128i fg; + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index c1e2d4bd0..c81d2a110 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -170,7 +170,6 @@ void FSoftwareRenderer::RenderView(player_t *player) R_InitColumnDrawers(); } - R_ClearMipmapCache(); R_BeginDrawerCommands(); R_RenderActorView (player->mo); // [RH] Let cameras draw onto textures that were visible this frame. diff --git a/src/textures/jpegtexture.cpp b/src/textures/jpegtexture.cpp index 3b5359846..f44b34d08 100644 --- a/src/textures/jpegtexture.cpp +++ b/src/textures/jpegtexture.cpp @@ -474,7 +474,7 @@ void FJPEGTexture::MakeTextureBgra() jpeg_decompress_struct cinfo; jpeg_error_mgr jerr; - PixelsBgra.resize(Width * Height, 0xffba0000); + CreatePixelsBgraWithMipmaps(); cinfo.err = jpeg_std_error(&jerr); cinfo.err->output_message = JPEG_OutputMessage; @@ -560,6 +560,8 @@ void FJPEGTexture::MakeTextureBgra() { delete[] buff; } + + GenerateBgraMipmaps(); } diff --git a/src/textures/pngtexture.cpp b/src/textures/pngtexture.cpp index 408cf1e2f..ee4eabe90 100644 --- a/src/textures/pngtexture.cpp +++ b/src/textures/pngtexture.cpp @@ -633,7 +633,7 @@ void FPNGTexture::MakeTextureBgra () lump = new FileReader(SourceFile.GetChars()); } - PixelsBgra.resize(Width * Height, 0xffff0000); + CreatePixelsBgraWithMipmaps(); if (StartOfIDAT != 0) { DWORD len, id; @@ -757,6 +757,7 @@ void FPNGTexture::MakeTextureBgra () } } delete lump; + GenerateBgraMipmaps(); } //=========================================================================== diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index 16a9e63a6..f5e4d4aa8 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -200,7 +200,7 @@ const uint32_t *FTexture::GetPixelsBgra() const BYTE *indices = GetPixels(); if (indices == nullptr) return nullptr; - PixelsBgra.resize(Width * Height); + CreatePixelsBgraWithMipmaps(); for (int i = 0; i < Width * Height; i++) { if (indices[i] != 0) @@ -208,6 +208,7 @@ const uint32_t *FTexture::GetPixelsBgra() else PixelsBgra[i] = 0; } + GenerateBgraMipmaps(); } return PixelsBgra.data(); } @@ -355,6 +356,71 @@ void FTexture::FreeSpans (Span **spans) const M_Free (spans); } +void FTexture::CreatePixelsBgraWithMipmaps() +{ + int levels = MipmapLevels(); + int buffersize = 0; + for (int i = 0; i < levels; i++) + { + int w = MAX(Width >> i, 1); + int h = MAX(Height >> i, 1); + buffersize += w * h; + } + PixelsBgra.resize(buffersize, 0xffff0000); +} + +int FTexture::MipmapLevels() const +{ + int widthbits = 0; + while ((Width >> widthbits) != 0) widthbits++; + + int heightbits = 0; + while ((Height >> heightbits) != 0) heightbits++; + + return MAX(widthbits, heightbits); +} + +void FTexture::GenerateBgraMipmaps() +{ + uint32_t *src = PixelsBgra.data(); + uint32_t *dest = src + Width * Height; + int levels = MipmapLevels(); + for (int i = 1; i < levels; i++) + { + int srcw = MAX(Width >> (i - 1), 1); + int srch = MAX(Height >> (i - 1), 1); + int w = MAX(Width >> i, 1); + int h = MAX(Height >> i, 1); + + for (int x = 0; x < w; x++) + { + int sx0 = x * 2; + int sx1 = MIN((x + 1) * 2, srcw - 1); + + for (int y = 0; y < h; y++) + { + int sy0 = y * 2; + int sy1 = MIN((y + 1) * 2, srch - 1); + + uint32_t src00 = src[sy0 + sx0 * srch]; + uint32_t src01 = src[sy1 + sx0 * srch]; + uint32_t src10 = src[sy0 + sx1 * srch]; + uint32_t src11 = src[sy1 + sx1 * srch]; + + uint32_t alpha = (APART(src00) + APART(src01) + APART(src10) + APART(src11) + 2) / 4; + uint32_t red = (RPART(src00) + RPART(src01) + RPART(src10) + RPART(src11) + 2) / 4; + uint32_t green = (GPART(src00) + GPART(src01) + GPART(src10) + GPART(src11) + 2) / 4; + uint32_t blue = (BPART(src00) + BPART(src01) + BPART(src10) + BPART(src11) + 2) / 4; + + dest[y + x * h] = (alpha << 24) | (red << 16) | (green << 8) | blue; + } + } + + src = dest; + dest += w * h; + } +} + void FTexture::CopyToBlock (BYTE *dest, int dwidth, int dheight, int xpos, int ypos, int rotate, const BYTE *translation) { const BYTE *pixels = GetPixels(); diff --git a/src/textures/textures.h b/src/textures/textures.h index 3b4b0b8b3..ab9dc3719 100644 --- a/src/textures/textures.h +++ b/src/textures/textures.h @@ -271,6 +271,10 @@ protected: std::vector PixelsBgra; + void CreatePixelsBgraWithMipmaps(); + void GenerateBgraMipmaps(); + int MipmapLevels() const; + public: static void FlipSquareBlock (BYTE *block, int x, int y); static void FlipSquareBlockBgra (uint32_t *block, int x, int y); From 4142b6ed1b1dc858a4a7ab16ca2f01a79567ce3b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 21 Jun 2016 22:03:34 +0200 Subject: [PATCH 68/94] GCC compile fix --- src/r_draw_rgba.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 4808cb257..617e83107 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -427,7 +427,7 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) return 0xff000000 | (red << 16) | (green << 8) | blue; } -inline bool span_sampler_setup(const uint32_t *&source, int &xbits, int &ybits, fixed_t xstep, fixed_t ystep) +inline bool span_sampler_setup(const uint32_t * RESTRICT &source, int &xbits, int &ybits, fixed_t xstep, fixed_t ystep) { if (!r_bilinear) return false; From f81042b3e20a2d9d300c0431d0bb094435eef340 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 21 Jun 2016 22:10:04 +0200 Subject: [PATCH 69/94] Fix warning generated by gcc --- src/r_draw_rgba.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 8144c096d..57b32b28c 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -1722,7 +1722,8 @@ public: // 64x64 is the most common case by far, so special case it. do { - *dest++ = alpha_blend(shade_bgra(sample_bilinear(source, xfrac, yfrac, 26, 26), light, shade_constants), *dest); + *dest = alpha_blend(shade_bgra(sample_bilinear(source, xfrac, yfrac, 26, 26), light, shade_constants), *dest); + dest++; xfrac += xstep; yfrac += ystep; } while (--count); @@ -1734,7 +1735,8 @@ public: int xmask = ((1 << _xbits) - 1) << _ybits; do { - *dest++ = alpha_blend(shade_bgra(sample_bilinear(source, xfrac, yfrac, 32 - _xbits, 32 - _ybits), light, shade_constants), *dest); + *dest = alpha_blend(shade_bgra(sample_bilinear(source, xfrac, yfrac, 32 - _xbits, 32 - _ybits), light, shade_constants), *dest); + dest++; xfrac += xstep; yfrac += ystep; } while (--count); From e294906d692e4eee921d35e013fafdd633f42257 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 22 Jun 2016 00:22:06 +0200 Subject: [PATCH 70/94] Voxel support in true color mode --- src/r_draw.cpp | 13 ++++ src/r_draw.h | 18 +++--- src/r_draw_rgba.cpp | 142 ++++++++++++++++++++++++++++++++++++++++++++ src/r_draw_rgba.h | 3 + src/r_things.cpp | 6 +- src/r_things.h | 2 +- 6 files changed, 171 insertions(+), 13 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 55353a006..8cca13289 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -100,6 +100,8 @@ void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip); void (*R_MapTiltedPlane)(int y, int x1); void (*R_MapColoredPlane)(int y, int x1); void (*R_DrawParticle)(vissprite_t *); +void (*R_SetupDrawSlab)(FColormap *base_colormap, float light, int shade); +void (*R_DrawSlab)(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); fixed_t (*tmvline1_add)(); void (*tmvline4_add)(); fixed_t (*tmvline1_addclamp)(); @@ -2306,6 +2308,9 @@ void R_InitColumnDrawers () R_MapColoredPlane = R_MapColoredPlane_rgba; R_DrawParticle = R_DrawParticle_rgba; + R_SetupDrawSlab = R_SetupDrawSlab_rgba; + R_DrawSlab = R_DrawSlab_rgba; + tmvline1_add = tmvline1_add_rgba; tmvline4_add = tmvline4_add_rgba; tmvline1_addclamp = tmvline1_addclamp_rgba; @@ -2403,6 +2408,14 @@ void R_InitColumnDrawers () R_MapColoredPlane = R_MapColoredPlane_C; R_DrawParticle = R_DrawParticle_C; +#ifdef X86_ASM + R_SetupDrawSlab = [](FColormap *colormap, float light, int shade) { R_SetupDrawSlabA(colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT)); }; + R_DrawSlab = R_DrawSlabA; +#else + R_SetupDrawSlab = [](FColormap *colormap, float light, int shade) { R_SetupDrawSlabC(colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT)); }; + R_DrawSlab = R_DrawSlabC; +#endif + tmvline1_add = tmvline1_add_C; tmvline4_add = tmvline4_add_C; tmvline1_addclamp = tmvline1_addclamp_C; diff --git a/src/r_draw.h b/src/r_draw.h index b662ddcee..547a044ea 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -286,16 +286,16 @@ void R_FillColumnP_C (void); void R_FillColumnHorizP_C (void); void R_FillSpan_C (void); -#ifdef X86_ASM -#define R_SetupDrawSlab R_SetupDrawSlabA -#define R_DrawSlab R_DrawSlabA -#else -#define R_SetupDrawSlab R_SetupDrawSlabC -#define R_DrawSlab R_DrawSlabC -#endif +extern void(*R_SetupDrawSlab)(FColormap *base_colormap, float light, int shade); +extern void(*R_DrawSlab)(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); -extern "C" void R_SetupDrawSlab(const BYTE *colormap); -extern "C" void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); +#ifdef X86_ASM +extern "C" void R_SetupDrawSlabA(const BYTE *colormap); +extern "C" void R_DrawSlabA(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); +#else +extern "C" void R_SetupDrawSlabC(const BYTE *colormap); +extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); +#endif extern "C" int ds_y; extern "C" int ds_x1; diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 57b32b28c..9603a8b3e 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -2303,6 +2303,123 @@ public: } }; +class DrawSlabRGBACommand : public DrawerCommand +{ + int _dx; + fixed_t _v; + int _dy; + fixed_t _vi; + const BYTE *_vptr; + uint32_t *_p; + ShadeConstants _shade_constants; + const BYTE *_colormap; + fixed_t _light; + int _pitch; + int _start_y; + +public: + DrawSlabRGBACommand(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p, ShadeConstants shade_constants, const BYTE *colormap, fixed_t light) + { + _dx = dx; + _v = v; + _dy = dy; + _vi = vi; + _vptr = vptr; + _p = (uint32_t *)p; + _shade_constants = shade_constants; + _colormap = colormap; + _light = light; + _pitch = dc_pitch; + _start_y = static_cast((p - dc_destorg) / (dc_pitch * 4)); + assert(dx > 0); + } + + void Execute(DrawerThread *thread) override + { + int dx = _dx; + fixed_t v = _v; + int dy = _dy; + fixed_t vi = _vi; + const BYTE *vptr = _vptr; + uint32_t *p = _p; + ShadeConstants shade_constants = _shade_constants; + const BYTE *colormap = _colormap; + uint32_t light = calc_light_multiplier(_light); + int pitch = _pitch; + int x; + + dy = thread->count_for_thread(_start_y, dy); + p = thread->dest_for_thread(_start_y, pitch, p); + v += vi * thread->skipped_by_thread(_start_y); + vi *= thread->num_cores; + pitch *= thread->num_cores; + + if (dx == 1) + { + while (dy > 0) + { + *p = shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); + p += pitch; + v += vi; + dy--; + } + } + else if (dx == 2) + { + while (dy > 0) + { + uint32_t color = shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); + p[0] = color; + p[1] = color; + p += pitch; + v += vi; + dy--; + } + } + else if (dx == 3) + { + while (dy > 0) + { + uint32_t color = shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); + p[0] = color; + p[1] = color; + p[2] = color; + p += pitch; + v += vi; + dy--; + } + } + else if (dx == 4) + { + while (dy > 0) + { + uint32_t color = shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); + p[0] = color; + p[1] = color; + p[2] = color; + p[3] = color; + p += pitch; + v += vi; + dy--; + } + } + else while (dy > 0) + { + uint32_t color = shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); + // The optimizer will probably turn this into a memset call. + // Since dx is not likely to be large, I'm not sure that's a good thing, + // hence the alternatives above. + for (x = 0; x < dx; x++) + { + p[x] = color; + } + p += pitch; + v += vi; + dy--; + } + } +}; + class Vlinec1RGBACommand : public DrawerCommand { DWORD _iscale; @@ -3813,6 +3930,31 @@ void R_FillSpan_rgba() DrawerCommandQueue::QueueCommand(); } +static ShadeConstants slab_rgba_shade_constants; +static const BYTE *slab_rgba_colormap; +static fixed_t slab_rgba_light; + +void R_SetupDrawSlab_rgba(FColormap *base_colormap, float light, int shade) +{ + slab_rgba_shade_constants.light_red = base_colormap->Color.r * 256 / 255; + slab_rgba_shade_constants.light_green = base_colormap->Color.g * 256 / 255; + slab_rgba_shade_constants.light_blue = base_colormap->Color.b * 256 / 255; + slab_rgba_shade_constants.light_alpha = base_colormap->Color.a * 256 / 255; + slab_rgba_shade_constants.fade_red = base_colormap->Fade.r; + slab_rgba_shade_constants.fade_green = base_colormap->Fade.g; + slab_rgba_shade_constants.fade_blue = base_colormap->Fade.b; + slab_rgba_shade_constants.fade_alpha = base_colormap->Fade.a; + slab_rgba_shade_constants.desaturate = MIN(abs(base_colormap->Desaturate), 255) * 255 / 256; + slab_rgba_shade_constants.simple_shade = (base_colormap->Color.d == 0x00ffffff && base_colormap->Fade.d == 0x00000000 && base_colormap->Desaturate == 0); + slab_rgba_colormap = base_colormap->Maps; + slab_rgba_light = LIGHTSCALE(light, shade); +} + +void R_DrawSlab_rgba(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p) +{ + DrawerCommandQueue::QueueCommand(dx, v, dy, vi, vptr, p, slab_rgba_shade_constants, slab_rgba_colormap, slab_rgba_light); +} + //extern FTexture *rw_pic; // For the asserts below DWORD vlinec1_rgba() diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 617e83107..c94cb1e4b 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -90,6 +90,9 @@ void R_DrawSpanAddClamp_rgba(); void R_DrawSpanMaskedAddClamp_rgba(); void R_FillSpan_rgba(); +void R_SetupDrawSlab_rgba(FColormap *base_colormap, float light, int shade); +void R_DrawSlab_rgba(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); + void R_DrawFogBoundary_rgba(int x1, int x2, short *uclip, short *dclip); DWORD vlinec1_rgba(); diff --git a/src/r_things.cpp b/src/r_things.cpp index f6a1a709f..e1f1017f3 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -688,7 +688,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop // Render the voxel, either directly to the screen or offscreen. R_DrawVoxel(spr->pa.vpos, spr->pa.vang, spr->gpos, spr->Angle, - spr->xscale, FLOAT2FIXED(spr->yscale), spr->voxel, spr->Style.BaseColormap->Maps + (spr->Style.ColormapNum << COLORMAPSHIFT), cliptop, clipbot, + spr->xscale, FLOAT2FIXED(spr->yscale), spr->voxel, spr->Style.BaseColormap, spr->Style.ColormapNum, cliptop, clipbot, minslabz, maxslabz, flags); // Blend the voxel, if that's what we need to do. @@ -2775,7 +2775,7 @@ extern double BaseYaspectMul;; void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, const FVector3 &dasprpos, DAngle dasprang, fixed_t daxscale, fixed_t dayscale, FVoxel *voxobj, - lighttable_t *colormap, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags) + FColormap *colormap, int colormapnum, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags) { int i, j, k, x, y, syoff, ggxstart, ggystart, nxoff; fixed_t cosang, sinang, sprcosang, sprsinang; @@ -2812,7 +2812,7 @@ void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, sprcosang = FLOAT2FIXED(dasprang.Cos()) >> 2; sprsinang = FLOAT2FIXED(-dasprang.Sin()) >> 2; - R_SetupDrawSlab(colormap); + R_SetupDrawSlab(colormap, 0.0f, colormapnum << FRACBITS); int pixelsize = r_swtruecolor ? 4 : 1; diff --git a/src/r_things.h b/src/r_things.h index 04d5487ee..13f89574b 100644 --- a/src/r_things.h +++ b/src/r_things.h @@ -144,7 +144,7 @@ enum { DVF_OFFSCREEN = 1, DVF_SPANSONLY = 2, DVF_MIRRORED = 4 }; void R_DrawVoxel(const FVector3 &viewpos, FAngle viewangle, const FVector3 &sprpos, DAngle dasprang, fixed_t daxscale, fixed_t dayscale, struct FVoxel *voxobj, - lighttable_t *colormap, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags); + FColormap *colormap, int colormapnum, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags); void R_ClipVisSprite (vissprite_t *vis, int xl, int xh); From db4cba239a16662c437da8dc5d03ce3f14dd151c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 22 Jun 2016 00:27:12 +0200 Subject: [PATCH 71/94] Renamed member variable to make it compile with gcc --- src/r_draw_rgba.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 9603a8b3e..1e2678bd3 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -2309,7 +2309,7 @@ class DrawSlabRGBACommand : public DrawerCommand fixed_t _v; int _dy; fixed_t _vi; - const BYTE *_vptr; + const BYTE *_voxelptr; uint32_t *_p; ShadeConstants _shade_constants; const BYTE *_colormap; @@ -2324,7 +2324,7 @@ public: _v = v; _dy = dy; _vi = vi; - _vptr = vptr; + _voxelptr = vptr; _p = (uint32_t *)p; _shade_constants = shade_constants; _colormap = colormap; @@ -2340,7 +2340,7 @@ public: fixed_t v = _v; int dy = _dy; fixed_t vi = _vi; - const BYTE *vptr = _vptr; + const BYTE *vptr = _voxelptr; uint32_t *p = _p; ShadeConstants shade_constants = _shade_constants; const BYTE *colormap = _colormap; From ca9d8e580e4b2b94c19182dcb160d9de6c904b5c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 22 Jun 2016 00:51:16 +0200 Subject: [PATCH 72/94] Increase command queue memory pool to 16 MB and make it flush if its exhausted --- src/r_draw_rgba.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index c94cb1e4b..47f7c8865 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -193,7 +193,7 @@ EXTERN_CVAR(Bool, r_mipmap) // Manages queueing up commands and executing them on worker threads class DrawerCommandQueue { - enum { memorypool_size = 4 * 1024 * 1024 }; + enum { memorypool_size = 16 * 1024 * 1024 }; char memorypool[memorypool_size]; size_t memorypool_pos = 0; @@ -241,8 +241,13 @@ public: else { void *ptr = AllocMemory(sizeof(T)); - if (!ptr) - return; + if (!ptr) // Out of memory - render what we got + { + queue->Finish(); + ptr = AllocMemory(sizeof(T)); + if (!ptr) + return; + } T *command = new (ptr)T(std::forward(args)...); queue->commands.push_back(command); } From 7a0c801a18bcf4f1910a1ae5cc708fe746ca2f9c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 22 Jun 2016 08:23:16 +0200 Subject: [PATCH 73/94] Added mipmapping to wallscan --- src/r_draw.h | 29 ------- src/r_draw_rgba.h | 2 +- src/r_segs.cpp | 207 +++++++++++++++++++++++++++++----------------- 3 files changed, 130 insertions(+), 108 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index 547a044ea..72304e81f 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -381,33 +381,4 @@ void R_SetTranslationMap(lighttable_t *translation); extern bool r_swtruecolor; EXTERN_CVAR(Bool, r_bilinear); -// Texture sampler state needed for bilinear filtering -struct SamplerSetup -{ - SamplerSetup() { } - SamplerSetup(fixed_t xoffset, bool magnifying, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)); - - const BYTE *source; - const BYTE *source2; - uint32_t texturefracx; -}; - -inline SamplerSetup::SamplerSetup(fixed_t xoffset, bool magnifying, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)) -{ - // Only do bilinear filtering if enabled and not a magnifying filter - if (!r_swtruecolor || !r_bilinear || magnifying) - { - source = getcol(texture, xoffset >> FRACBITS); - source2 = nullptr; - texturefracx = 0; - } - else - { - int tx = (xoffset - FRACUNIT / 2) >> FRACBITS; - source = getcol(texture, tx); - source2 = getcol(texture, tx + 1); - texturefracx = ((xoffset + FRACUNIT / 2) >> (FRACBITS - 4)) & 15; - } -} - #endif diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 47f7c8865..8f97d4ecd 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -443,7 +443,7 @@ inline bool span_sampler_setup(const uint32_t * RESTRICT &source, int &xbits, in // Is this a magfilter or minfilter? fixed_t xmagnitude = abs(xstep) >> (32 - xbits - FRACBITS); fixed_t ymagnitude = abs(ystep) >> (32 - ybits - FRACBITS); - fixed_t magnitude = (xmagnitude + ymagnitude) * 3 + (1 << (FRACBITS -1)); + fixed_t magnitude = (xmagnitude + ymagnitude) * 2 + (1 << (FRACBITS -1)); if (magnitude >> FRACBITS == 0) return false; diff --git a/src/r_segs.cpp b/src/r_segs.cpp index d71487bb9..84c967d1d 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1067,11 +1067,92 @@ void R_RenderFakeWallRange (drawseg_t *ds, int x1, int x2) return; } +EXTERN_CVAR(Bool, r_mipmap) + +struct WallscanSampler +{ + WallscanSampler() { } + WallscanSampler(int y1, float swal, double yrepeat, fixed_t xoffset, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)); + + uint32_t uv_pos; + uint32_t uv_step; + int32_t uv_fracbits; + uint32_t uv_max; + + const BYTE *source; + const BYTE *source2; + uint32_t texturefracx; +}; + +WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xoffset, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)) +{ + int base_width = texture->GetWidth(); + int base_height = texture->GetHeight(); + uv_fracbits = 32 - texture->HeightBits; + uv_max = base_height << uv_fracbits; + + // Find start uv in [0-base_height[ range. + // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. + double uv_stepd = swal * yrepeat; + double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / base_height; + v = v - floor(v); + v *= base_height; + v *= (1 << uv_fracbits); + + uv_pos = (uint32_t)v; + uv_step = xs_ToFixed(uv_fracbits, uv_stepd); + + bool magnifying = uv_step >> (uv_fracbits - 1) == 0; + + // Only do bilinear filtering if enabled and not a magnifying filter + if (!r_swtruecolor || !r_bilinear || magnifying || getcol != R_GetColumn) + { + source = getcol(texture, xoffset >> FRACBITS); + source2 = nullptr; + texturefracx = 0; + } + else + { + int mipmap_offset = 0; + int mip_width = base_width; + int mip_height = base_height; + if (r_mipmap) + { + fixed_t magnitude = abs((int32_t)uv_step) >> (uv_fracbits - FRACBITS); + int level = magnitude >> FRACBITS; + while (level != 0) + { + if (uv_fracbits > 30) + break; + + mipmap_offset += mip_width * mip_height; + uv_fracbits += 1; + uv_pos >>= 1; + uv_step >>= 1; + xoffset >>= 1; + level >>= 1; + mip_width = MAX(mip_width >> 1, 1); + mip_height = MAX(mip_height >> 1, 1); + } + } + + const uint32_t *pixels = texture->GetPixelsBgra() + mipmap_offset; + + int tx0 = ((xoffset - FRACUNIT / 2) >> FRACBITS) % mip_width; + if (tx0 < 0) + tx0 += mip_width; + int tx1 = (tx0 + 1) % mip_width; + source = (BYTE*)(pixels + tx0 * mip_height); + source2 = (BYTE*)(pixels + tx1 * mip_height); + texturefracx = ((xoffset + FRACUNIT / 2) >> (FRACBITS - 4)) & 15; + } +} + // Draw a column with support for non-power-of-two ranges -uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv_step, uint32_t uv_max, const SamplerSetup &sampler, DWORD(*draw1column)()) +void wallscan_drawcol1(int x, int y1, int y2, WallscanSampler &sampler, DWORD(*draw1column)()) { int pixelsize = r_swtruecolor ? 4 : 1; - if (uv_max == 0) // power of two + if (sampler.uv_max == 0) // power of two { int count = y2 - y1; @@ -1080,24 +1161,24 @@ uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv dc_texturefracx = sampler.texturefracx; dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; dc_count = count; - dc_iscale = uv_step; - dc_texturefrac = uv_start; + dc_iscale = sampler.uv_step; + dc_texturefrac = sampler.uv_pos; draw1column(); - uint64_t step64 = uv_step; - uint64_t pos64 = uv_start; - return (uint32_t)(pos64 + step64 * count); + uint64_t step64 = sampler.uv_step; + uint64_t pos64 = sampler.uv_pos; + sampler.uv_pos = (uint32_t)(pos64 + step64 * count); } else { - uint32_t uv_pos = uv_start; + uint32_t uv_pos = sampler.uv_pos; uint32_t left = y2 - y1; while (left > 0) { - uint32_t available = uv_max - uv_pos; - uint32_t next_uv_wrap = available / uv_step; - if (available % uv_step != 0) + uint32_t available = sampler.uv_max - uv_pos; + uint32_t next_uv_wrap = available / sampler.uv_step; + if (available % sampler.uv_step != 0) next_uv_wrap++; uint32_t count = MIN(left, next_uv_wrap); @@ -1106,25 +1187,25 @@ uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv dc_texturefracx = sampler.texturefracx; dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; dc_count = count; - dc_iscale = uv_step; + dc_iscale = sampler.uv_step; dc_texturefrac = uv_pos; draw1column(); left -= count; - uv_pos += uv_step * count; - if (uv_pos >= uv_max) - uv_pos -= uv_max; + uv_pos += sampler.uv_step * count; + if (uv_pos >= sampler.uv_max) + uv_pos -= sampler.uv_max; } - return uv_pos; + sampler.uv_pos = uv_pos; } } // Draw four columns with support for non-power-of-two ranges -void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_step, uint32_t uv_max, const SamplerSetup *sampler, void(*draw4columns)()) +void wallscan_drawcol4(int x, int y1, int y2, WallscanSampler *sampler, void(*draw4columns)()) { int pixelsize = r_swtruecolor ? 4 : 1; - if (uv_max == 0) // power of two, no wrap handling needed + if (sampler[0].uv_max == 0) // power of two, no wrap handling needed { int count = y2 - y1; for (int i = 0; i < 4; i++) @@ -1132,12 +1213,12 @@ void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_ste bufplce[i] = sampler[i].source; bufplce2[i] = sampler[i].source2; buftexturefracx[i] = sampler[i].texturefracx; - vplce[i] = uv_pos[i]; - vince[i] = uv_step[i]; + vplce[i] = sampler[i].uv_pos; + vince[i] = sampler[i].uv_step; - uint64_t step64 = uv_step[i]; - uint64_t pos64 = uv_pos[i]; - uv_pos[i] = (uint32_t)(pos64 + step64 * count); + uint64_t step64 = sampler[i].uv_step; + uint64_t pos64 = sampler[i].uv_pos; + sampler[i].uv_pos = (uint32_t)(pos64 + step64 * count); } dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; dc_count = count; @@ -1160,9 +1241,9 @@ void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_ste uint32_t count = left; for (int i = 0; i < 4; i++) { - uint32_t available = uv_max - uv_pos[i]; - uint32_t next_uv_wrap = available / uv_step[i]; - if (available % uv_step[i] != 0) + uint32_t available = sampler[i].uv_max - sampler[i].uv_pos; + uint32_t next_uv_wrap = available / sampler[i].uv_step; + if (available % sampler[i].uv_step != 0) next_uv_wrap++; count = MIN(next_uv_wrap, count); } @@ -1170,8 +1251,8 @@ void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_ste // Draw until that column wraps for (int i = 0; i < 4; i++) { - vplce[i] = uv_pos[i]; - vince[i] = uv_step[i]; + vplce[i] = sampler[i].uv_pos; + vince[i] = sampler[i].uv_step; } dc_count = count; draw4columns(); @@ -1179,9 +1260,9 @@ void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_ste // Wrap the uv position for (int i = 0; i < 4; i++) { - uv_pos[i] += uv_step[i] * count; - if (uv_pos[i] >= uv_max) - uv_pos[i] -= uv_max; + sampler[i].uv_pos += sampler[i].uv_step * count; + if (sampler[i].uv_pos >= sampler[i].uv_max) + sampler[i].uv_pos -= sampler[i].uv_max; } left -= count; @@ -1189,22 +1270,6 @@ void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_ste } } -// Calculates a wrapped uv start position value for a column -void calc_uv_start_and_step(int y1, float swal, double yrepeat, uint32_t uv_height, int fracbits, uint32_t &uv_start_out, uint32_t &uv_step_out) -{ - double uv_stepd = swal * yrepeat; - - // Find start uv in [0-uv_height[ range. - // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. - double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / uv_height; - v = v - floor(v); - v *= uv_height; - v *= (1 << fracbits); - - uv_start_out = (uint32_t)v; - uv_step_out = xs_ToFixed(fracbits, uv_stepd); -} - typedef DWORD(*Draw1ColumnFuncPtr)(); typedef void(*Draw4ColumnsFuncPtr)(); @@ -1216,15 +1281,12 @@ void wallscan_any( if (rw_pic->UseType == FTexture::TEX_Null) return; - uint32_t uv_height = rw_pic->GetHeight(); - uint32_t fracbits = 32 - rw_pic->HeightBits; - uint32_t uv_max = uv_height << fracbits; + fixed_t xoffset = rw_offset; + rw_pic->GetHeight(); // To ensure that rw_pic->HeightBits has been set DWORD(*draw1column)(); void(*draw4columns)(); - setupwallscan(fracbits, draw1column, draw4columns); - - fixed_t xoffset = rw_offset; + setupwallscan(32 - rw_pic->HeightBits, draw1column, draw4columns); bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); if (fixed) @@ -1261,11 +1323,8 @@ void wallscan_any( if (!fixed) R_SetColorMapLight(basecolormap, light, wallshade); - uint32_t uv_start, uv_step; - calc_uv_start_and_step(y1, swal[x], yrepeat, uv_height, fracbits, uv_start, uv_step); - - SamplerSetup sampler(lwal[x] + xoffset, uv_step >> (fracbits - 1) == 0, rw_pic, getcol); - wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, sampler, draw1column); + WallscanSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, rw_pic, getcol); + wallscan_drawcol1(x, y1, y2, sampler, draw1column); } // The aligned columns @@ -1282,17 +1341,9 @@ void wallscan_any( light += rw_lightstep; } - uint32_t uv_pos[4], uv_step[4]; - int magnifying = 0; + WallscanSampler sampler[4]; for (int i = 0; i < 4; i++) - { - calc_uv_start_and_step(y1[i], swal[x + i], yrepeat, uv_height, fracbits, uv_pos[i], uv_step[i]); - magnifying |= uv_step[i] >> (fracbits - 1); - } - - SamplerSetup sampler[4]; - for (int i = 0; i < 4; i++) - sampler[i] = SamplerSetup(lwal[x + i] + xoffset, magnifying == 0, rw_pic, getcol); + sampler[i] = WallscanSampler(y1[i], swal[x + i], yrepeat, lwal[x + i] + xoffset, rw_pic, getcol); // Figure out where we vertically can start and stop drawing 4 columns in one go int middle_y1 = y1[0]; @@ -1305,13 +1356,16 @@ void wallscan_any( // If we got an empty column in our set we cannot draw 4 columns in one go: bool empty_column_in_set = false; + int bilinear_count = 0; for (int i = 0; i < 4; i++) { if (y2[i] <= y1[i]) empty_column_in_set = true; + if (sampler[i].source2) + bilinear_count++; } - if (empty_column_in_set || middle_y2 <= middle_y1) + if (empty_column_in_set || middle_y2 <= middle_y1 || (bilinear_count > 0 && bilinear_count < 4)) { for (int i = 0; i < 4; i++) { @@ -1320,7 +1374,7 @@ void wallscan_any( if (!fixed) R_SetColorMapLight(basecolormap, lights[i], wallshade); - wallscan_drawcol1(x + i, y1[i], y2[i], uv_pos[i], uv_step[i], uv_max, sampler[i], draw1column); + wallscan_drawcol1(x + i, y1[i], y2[i], sampler[i], draw1column); } continue; } @@ -1332,7 +1386,7 @@ void wallscan_any( R_SetColorMapLight(basecolormap, lights[i], wallshade); if (y1[i] < middle_y1) - uv_pos[i] = wallscan_drawcol1(x + i, y1[i], middle_y1, uv_pos[i], uv_step[i], uv_max, sampler[i], draw1column); + wallscan_drawcol1(x + i, y1[i], middle_y1, sampler[i], draw1column); } // Draw the area where all 4 columns are active @@ -1352,7 +1406,7 @@ void wallscan_any( } } } - wallscan_drawcol4(x, middle_y1, middle_y2, uv_pos, uv_step, uv_max, sampler, draw4columns); + wallscan_drawcol4(x, middle_y1, middle_y2, sampler, draw4columns); // Draw the last rows where not all 4 columns are active for (int i = 0; i < 4; i++) @@ -1361,7 +1415,7 @@ void wallscan_any( R_SetColorMapLight(basecolormap, lights[i], wallshade); if (middle_y2 < y2[i]) - uv_pos[i] = wallscan_drawcol1(x + i, middle_y2, y2[i], uv_pos[i], uv_step[i], uv_max, sampler[i], draw1column); + wallscan_drawcol1(x + i, middle_y2, y2[i], sampler[i], draw1column); } } @@ -1376,11 +1430,8 @@ void wallscan_any( if (!fixed) R_SetColorMapLight(basecolormap, light, wallshade); - uint32_t uv_start, uv_step; - calc_uv_start_and_step(y1, swal[x], yrepeat, uv_height, fracbits, uv_start, uv_step); - - SamplerSetup sampler(lwal[x] + xoffset, uv_step >> (fracbits - 1) == 0, rw_pic, getcol); - wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, sampler, draw1column); + WallscanSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, rw_pic, getcol); + wallscan_drawcol1(x, y1, y2, sampler, draw1column); } NetUpdate (); From 698b5f3db19dd5b3331cae5a7a234c23192c310a Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 24 Jun 2016 11:37:51 +0200 Subject: [PATCH 74/94] Simplify drawer code by creating loop iterators Fixed blending bug --- src/r_draw.cpp | 12 +- src/r_draw.h | 6 +- src/r_draw_rgba.cpp | 3338 +++++++++++----------------------------- src/r_draw_rgba.h | 426 ++--- src/r_draw_rgba_sse.h | 116 +- src/r_drawt_rgba.cpp | 70 +- src/r_drawt_rgba_sse.h | 12 +- src/r_plane.cpp | 4 +- src/r_segs.cpp | 16 +- src/r_things.cpp | 2 +- src/v_draw.cpp | 2 +- 11 files changed, 1235 insertions(+), 2769 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 8cca13289..578ca9646 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -1644,6 +1644,8 @@ extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *v int vlinebits; int mvlinebits; +uint32_t vlinemax; +uint32_t mvlinemax; #ifndef X86_ASM static DWORD vlinec1 (); @@ -1693,11 +1695,12 @@ DWORD (*domvline1)() = mvlineasm1; void (*domvline4)() = mvlineasm4; #endif -void setupvline (int fracbits) +void setupvline (int fracbits, int fracmax) { if (r_swtruecolor) { vlinebits = fracbits; + vlinemax = fracmax; return; } @@ -1777,7 +1780,7 @@ void vlinec4 () } #endif -void setupmvline (int fracbits) +void setupmvline (int fracbits, int fracmax) { if (!r_swtruecolor) { @@ -1792,6 +1795,7 @@ void setupmvline (int fracbits) else { mvlinebits = fracbits; + mvlinemax = fracmax; } } @@ -1964,10 +1968,12 @@ void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip) } int tmvlinebits; +uint32_t tmvlinemax; -void setuptmvline (int bits) +void setuptmvline (int bits, int fracmax) { tmvlinebits = bits; + tmvlinemax = fracmax; } fixed_t tmvline1_add_C () diff --git a/src/r_draw.h b/src/r_draw.h index 72304e81f..bd477efc4 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -100,13 +100,13 @@ extern void (*R_DrawColumn)(void); extern DWORD (*dovline1) (); extern DWORD (*doprevline1) (); extern void (*dovline4) (); -extern void setupvline (int); +extern void setupvline (int,int); extern DWORD (*domvline1) (); extern void (*domvline4) (); -extern void setupmvline (int); +extern void setupmvline (int,int); -extern void setuptmvline (int); +extern void setuptmvline (int,int); // The Spectre/Invisibility effect. extern void (*R_DrawFuzzColumn)(void); diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 1e2678bd3..dc97fdd47 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -51,6 +51,9 @@ extern int vlinebits; extern int mvlinebits; extern int tmvlinebits; +extern uint32_t vlinemax; +extern uint32_t mvlinemax; +extern uint32_t tmvlinemax; extern "C" short spanend[MAXHEIGHT]; extern float rw_light; @@ -261,353 +264,520 @@ void DrawerCommandQueue::StopThreads() ///////////////////////////////////////////////////////////////////////////// -class DrawColumnRGBACommand : public DrawerCommand +class DrawerColumnCommand : public DrawerCommand { +public: int _count; BYTE * RESTRICT _dest; - DWORD _texturefrac; - DWORD _iscale; - fixed_t _light; - const BYTE * RESTRICT _source; int _pitch; + DWORD _iscale; + DWORD _texturefrac; + + DrawerColumnCommand() + { + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _pitch = dc_pitch; + } + + class LoopIterator + { + public: + int count; + uint32_t *dest; + int pitch; + fixed_t fracstep; + fixed_t frac; + + LoopIterator(DrawerColumnCommand *command, DrawerThread *thread) + { + count = thread->count_for_thread(command->_dest_y, command->_count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(command->_dest_y, command->_pitch, (uint32_t*)command->_dest); + pitch = command->_pitch * thread->num_cores; + + fracstep = command->_iscale * thread->num_cores; + frac = command->_texturefrac + command->_iscale * thread->skipped_by_thread(command->_dest_y); + } + + uint32_t sample_index() + { + return frac >> FRACBITS; + } + + explicit operator bool() + { + return count > 0; + } + + bool next() + { + dest += pitch; + frac += fracstep; + return (--count) != 0; + } + }; +}; + +class DrawColumnRGBACommand : public DrawerColumnCommand +{ + uint32_t _light; + const BYTE * RESTRICT _source; ShadeConstants _shade_constants; BYTE * RESTRICT _colormap; public: DrawColumnRGBACommand() { - _count = dc_count; - _dest = dc_dest; - _texturefrac = dc_texturefrac; - _iscale = dc_iscale; - _light = dc_light; - _source = dc_source; - _pitch = dc_pitch; + _light = LightBgra::calc_light_multiplier(dc_light); _shade_constants = dc_shade_constants; + _source = dc_source; _colormap = dc_colormap; } void Execute(DrawerThread *thread) override { - int count; - uint32_t* dest; - fixed_t frac; - fixed_t fracstep; - - count = thread->count_for_thread(_dest_y, _count); - - // Zero length, column does not exceed a pixel. - if (count <= 0) - return; - - // Framebuffer destination address. - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - // Determine scaling, - // which is the only mapping to be done. - fracstep = _iscale * thread->num_cores; - frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - - // [RH] Get local copies of these variables so that the compiler - // has a better chance of optimizing this well. - const BYTE *source = _source; - int pitch = _pitch * thread->num_cores; - BYTE *colormap = _colormap; - + LoopIterator loop(this, thread); + if (!loop) return; do { - *dest = shade_pal_index(colormap[source[frac >> FRACBITS]], light, shade_constants); - - dest += pitch; - frac += fracstep; - - } while (--count); + uint32_t fg = LightBgra::shade_pal_index(_colormap[_source[loop.sample_index()]], _light, _shade_constants); + *loop.dest = BlendBgra::copy(fg); + } while (loop.next()); } }; -class FillColumnRGBACommand : public DrawerCommand +class FillColumnRGBACommand : public DrawerColumnCommand { - int _count; - BYTE * RESTRICT _dest; - fixed_t _light; - int _pitch; - int _color; + uint32_t _color; public: FillColumnRGBACommand() { - _count = dc_count; - _dest = dc_dest; - _light = dc_light; - _pitch = dc_pitch; - _color = dc_color; + uint32_t light = LightBgra::calc_light_multiplier(dc_light); + _color = LightBgra::shade_pal_index_simple(dc_color, light); } void Execute(DrawerThread *thread) override { - int count; - uint32_t* dest; - - count = thread->count_for_thread(_dest_y, _count); - - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - uint32_t light = calc_light_multiplier(_light); - + LoopIterator loop(this, thread); + if (!loop) return; + do { - int pitch = _pitch * thread->num_cores; - uint32_t color = shade_pal_index_simple(_color, light); - - do - { - *dest = color; - dest += pitch; - } while (--count); - } + *loop.dest = BlendBgra::copy(_color); + } while (loop.next()); } }; -class FillAddColumnRGBACommand : public DrawerCommand +class FillAddColumnRGBACommand : public DrawerColumnCommand { - int _count; - BYTE * RESTRICT _dest; - int _pitch; uint32_t _srccolor; public: FillAddColumnRGBACommand() { - _count = dc_count; - _dest = dc_dest; - _pitch = dc_pitch; _srccolor = dc_srccolor_bgra; } void Execute(DrawerThread *thread) override { - int count; - uint32_t *dest; + LoopIterator loop(this, thread); + if (!loop) return; - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - - uint32_t fg = _srccolor; - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t fg_alpha = fg >> 24; - fg_alpha += fg_alpha >> 7; - - fg_red *= fg_alpha; - fg_green *= fg_alpha; - fg_blue *= fg_alpha; - - uint32_t inv_alpha = 256 - fg_alpha; + uint32_t alpha = APART(_srccolor); + alpha += alpha >> 7; do { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red + bg_red * inv_alpha) / 256; - uint32_t green = (fg_green + bg_green * inv_alpha) / 256; - uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); + *loop.dest = BlendBgra::add(_srccolor, *loop.dest, alpha, 256 - alpha); + } while (loop.next()); } }; -class FillAddClampColumnRGBACommand : public DrawerCommand +class FillAddClampColumnRGBACommand : public DrawerColumnCommand { - int _count; - BYTE * RESTRICT _dest; - int _pitch; int _color; uint32_t _srccolor; - fixed_t _srcalpha; - fixed_t _destalpha; + uint32_t _srcalpha; + uint32_t _destalpha; public: FillAddClampColumnRGBACommand() { - _count = dc_count; - _dest = dc_dest; - _pitch = dc_pitch; _color = dc_color; _srccolor = dc_srccolor_bgra; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); } void Execute(DrawerThread *thread) override { - int count; - uint32_t *dest; - - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - - uint32_t fg = _srccolor; - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - fg_red *= fg_alpha; - fg_green *= fg_alpha; - fg_blue *= fg_alpha; - - do { - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); + LoopIterator loop(this, thread); + if (!loop) return; + do + { + *loop.dest = BlendBgra::add(_srccolor, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); } }; -class FillSubClampColumnRGBACommand : public DrawerCommand +class FillSubClampColumnRGBACommand : public DrawerColumnCommand { - int _count; - BYTE * RESTRICT _dest; - int _pitch; - int _color; uint32_t _srccolor; - fixed_t _srcalpha; - fixed_t _destalpha; + uint32_t _srcalpha; + uint32_t _destalpha; public: FillSubClampColumnRGBACommand() { - _count = dc_count; - _dest = dc_dest; - _pitch = dc_pitch; - _color = dc_color; _srccolor = dc_srccolor_bgra; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); } void Execute(DrawerThread *thread) override { - int count; - uint32_t *dest; - - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - - uint32_t fg = _srccolor; - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - fg_red *= fg_alpha; - fg_green *= fg_alpha; - fg_blue *= fg_alpha; - - do { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); + LoopIterator loop(this, thread); + if (!loop) return; + do + { + *loop.dest = BlendBgra::sub(_srccolor, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); } }; -class FillRevSubClampColumnRGBACommand : public DrawerCommand +class FillRevSubClampColumnRGBACommand : public DrawerColumnCommand { - int _count; - BYTE * RESTRICT _dest; - int _pitch; - int _color; uint32_t _srccolor; - fixed_t _srcalpha; - fixed_t _destalpha; + uint32_t _srcalpha; + uint32_t _destalpha; public: FillRevSubClampColumnRGBACommand() { - _count = dc_count; - _dest = dc_dest; - _pitch = dc_pitch; - _color = dc_color; _srccolor = dc_srccolor_bgra; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); } void Execute(DrawerThread *thread) override { - int count; - uint32_t *dest; + LoopIterator loop(this, thread); + if (!loop) return; + do + { + *loop.dest = BlendBgra::revsub(_srccolor, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; +class DrawAddColumnRGBACommand : public DrawerColumnCommand +{ + const BYTE * RESTRICT _source; + uint32_t _light; + ShadeConstants _shade_constants; + uint32_t _srcalpha; + uint32_t _destalpha; + BYTE * RESTRICT _colormap; - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; +public: + DrawAddColumnRGBACommand() + { + _source = dc_source; + _light = LightBgra::calc_light_multiplier(dc_light); + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); + _colormap = dc_colormap; + } - uint32_t fg = _srccolor; - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(_colormap[_source[loop.sample_index()]], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; - fg_red *= fg_alpha; - fg_green *= fg_alpha; - fg_blue *= fg_alpha; +class DrawTranslatedColumnRGBACommand : public DrawerColumnCommand +{ + fixed_t _light; + ShadeConstants _shade_constants; + BYTE * RESTRICT _translation; + const BYTE * RESTRICT _source; - do { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; +public: + DrawTranslatedColumnRGBACommand() + { + _light = LightBgra::calc_light_multiplier(dc_light); + _shade_constants = dc_shade_constants; + _translation = dc_translation; + _source = dc_source; + } - uint32_t red = clamp((0x10000 + fg_red - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(_translation[_source[loop.sample_index()]], _light, _shade_constants); + *loop.dest = BlendBgra::copy(fg); + } while (loop.next()); + } +}; - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); +class DrawTlatedAddColumnRGBACommand : public DrawerColumnCommand +{ + fixed_t _light; + ShadeConstants _shade_constants; + BYTE * RESTRICT _translation; + const BYTE * RESTRICT _source; + uint32_t _srcalpha; + uint32_t _destalpha; + +public: + DrawTlatedAddColumnRGBACommand() + { + _light = LightBgra::calc_light_multiplier(dc_light); + _shade_constants = dc_shade_constants; + _translation = dc_translation; + _source = dc_source; + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(_translation[_source[loop.sample_index()]], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; + +class DrawShadedColumnRGBACommand : public DrawerColumnCommand +{ +private: + const BYTE * RESTRICT _source; + lighttable_t * RESTRICT _colormap; + uint32_t _color; + +public: + DrawShadedColumnRGBACommand() + { + _source = dc_source; + _colormap = dc_colormap; + _color = LightBgra::shade_pal_index_simple(dc_color, LightBgra::calc_light_multiplier(dc_light)); + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t alpha = clamp(_colormap[_source[loop.sample_index()]], 0, 64) * 4; + uint32_t inv_alpha = 256 - alpha; + *loop.dest = BlendBgra::add(_color, *loop.dest, alpha, inv_alpha); + } while (loop.next()); + } +}; + +class DrawAddClampColumnRGBACommand : public DrawerColumnCommand +{ + const BYTE * RESTRICT _source; + uint32_t _light; + ShadeConstants _shade_constants; + uint32_t _srcalpha; + uint32_t _destalpha; + +public: + DrawAddClampColumnRGBACommand() + { + _source = dc_source; + _light = LightBgra::calc_light_multiplier(dc_light); + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(_source[loop.sample_index()], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; + +class DrawAddClampTranslatedColumnRGBACommand : public DrawerColumnCommand +{ + BYTE * RESTRICT _translation; + const BYTE * RESTRICT _source; + uint32_t _light; + ShadeConstants _shade_constants; + uint32_t _srcalpha; + uint32_t _destalpha; + +public: + DrawAddClampTranslatedColumnRGBACommand() + { + _translation = dc_translation; + _source = dc_source; + _light = LightBgra::calc_light_multiplier(dc_light); + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(_translation[_source[loop.sample_index()]], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; + +class DrawSubClampColumnRGBACommand : public DrawerColumnCommand +{ + const BYTE * RESTRICT _source; + uint32_t _light; + ShadeConstants _shade_constants; + uint32_t _srcalpha; + uint32_t _destalpha; + +public: + DrawSubClampColumnRGBACommand() + { + _source = dc_source; + _light = LightBgra::calc_light_multiplier(dc_light); + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(_source[loop.sample_index()], _light, _shade_constants); + *loop.dest = BlendBgra::sub(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; + +class DrawSubClampTranslatedColumnRGBACommand : public DrawerColumnCommand +{ + const BYTE * RESTRICT _source; + uint32_t _light; + ShadeConstants _shade_constants; + uint32_t _srcalpha; + uint32_t _destalpha; + BYTE * RESTRICT _translation; + +public: + DrawSubClampTranslatedColumnRGBACommand() + { + _source = dc_source; + _light = LightBgra::calc_light_multiplier(dc_light); + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); + _translation = dc_translation; + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(_translation[_source[loop.sample_index()]], _light, _shade_constants); + *loop.dest = BlendBgra::sub(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; + +class DrawRevSubClampColumnRGBACommand : public DrawerColumnCommand +{ + const BYTE * RESTRICT _source; + uint32_t _light; + ShadeConstants _shade_constants; + uint32_t _srcalpha; + uint32_t _destalpha; + +public: + DrawRevSubClampColumnRGBACommand() + { + _source = dc_source; + _light = LightBgra::calc_light_multiplier(dc_light); + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(_source[loop.sample_index()], _light, _shade_constants); + *loop.dest = BlendBgra::revsub(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; + +class DrawRevSubClampTranslatedColumnRGBACommand : public DrawerColumnCommand +{ + const BYTE * RESTRICT _source; + uint32_t _light; + ShadeConstants _shade_constants; + uint32_t _srcalpha; + uint32_t _destalpha; + BYTE * RESTRICT _translation; + +public: + DrawRevSubClampTranslatedColumnRGBACommand() + { + _source = dc_source; + _light = LightBgra::calc_light_multiplier(dc_light); + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); + _translation = dc_translation; + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(_translation[_source[loop.sample_index()]], _light, _shade_constants); + *loop.dest = BlendBgra::revsub(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); } }; @@ -635,19 +805,16 @@ public: void Execute(DrawerThread *thread) override { - int count; - uint32_t *dest; - int yl = MAX(_yl, 1); int yh = MIN(_yh, _fuzzviewheight); - count = thread->count_for_thread(yl, yh - yl + 1); + int count = thread->count_for_thread(yl, yh - yl + 1); // Zero length. if (count <= 0) return; - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + _x + (uint32_t*)_destorg); + uint32_t *dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + _x + (uint32_t*)_destorg); int pitch = _pitch * thread->num_cores; int fuzzstep = thread->num_cores; @@ -659,13 +826,10 @@ public: if (yl < fuzzstep) { uint32_t bg = dest[fuzzoffset[fuzz] * fuzzstep + pitch]; - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = (bg) & 0xff; - uint32_t red = bg_red * 3 / 4; - uint32_t green = bg_green * 3 / 4; - uint32_t blue = bg_blue * 3 / 4; + uint32_t red = RPART(bg) * 3 / 4; + uint32_t green = GPART(bg) * 3 / 4; + uint32_t blue = BPART(bg) * 3 / 4; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -694,13 +858,10 @@ public: do { uint32_t bg = dest[fuzzoffset[fuzz] * fuzzstep]; - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = (bg) & 0xff; - uint32_t red = bg_red * 3 / 4; - uint32_t green = bg_green * 3 / 4; - uint32_t blue = bg_blue * 3 / 4; + uint32_t red = RPART(bg) * 3 / 4; + uint32_t green = GPART(bg) * 3 / 4; + uint32_t blue = BPART(bg) * 3 / 4; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -714,783 +875,21 @@ public: if (lowerbounds) { uint32_t bg = dest[fuzzoffset[fuzz] * fuzzstep - pitch]; - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = (bg) & 0xff; - uint32_t red = bg_red * 3 / 4; - uint32_t green = bg_green * 3 / 4; - uint32_t blue = bg_blue * 3 / 4; + uint32_t red = RPART(bg) * 3 / 4; + uint32_t green = GPART(bg) * 3 / 4; + uint32_t blue = BPART(bg) * 3 / 4; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } } }; -class DrawAddColumnRGBACommand : public DrawerCommand -{ - int _count; - BYTE * RESTRICT _dest; - DWORD _iscale; - DWORD _texturefrac; - const BYTE * RESTRICT _source; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - BYTE * RESTRICT _colormap; +///////////////////////////////////////////////////////////////////////////// +class DrawerSpanCommand : public DrawerCommand +{ public: - DrawAddColumnRGBACommand() - { - _count = dc_count; - _dest = dc_dest; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _source = dc_source; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - _colormap = dc_colormap; - } - - void Execute(DrawerThread *thread) override - { - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - fracstep = _iscale * thread->num_cores; - frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - - { - const BYTE *source = _source; - int pitch = _pitch * thread->num_cores; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - BYTE *colormap = _colormap; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light, shade_constants); - - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } - } -}; - -class DrawTranslatedColumnRGBACommand : public DrawerCommand -{ - int _count; - fixed_t _light; - ShadeConstants _shade_constants; - BYTE * RESTRICT _dest; - DWORD _iscale; - DWORD _texturefrac; - BYTE * RESTRICT _translation; - const BYTE * RESTRICT _source; - int _pitch; - -public: - DrawTranslatedColumnRGBACommand() - { - _count = dc_count; - _light = dc_light; - _shade_constants = dc_shade_constants; - _dest = dc_dest; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _translation = dc_translation; - _source = dc_source; - _pitch = dc_pitch; - } - - void Execute(DrawerThread *thread) override - { - int count; - uint32_t* dest; - fixed_t frac; - fixed_t fracstep; - - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - fracstep = _iscale * thread->num_cores; - frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - - { - // [RH] Local copies of global vars to improve compiler optimizations - BYTE *translation = _translation; - const BYTE *source = _source; - int pitch = _pitch * thread->num_cores; - - do - { - *dest = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - dest += pitch; - frac += fracstep; - } while (--count); - } - } -}; - -class DrawTlatedAddColumnRGBACommand : public DrawerCommand -{ - int _count; - fixed_t _light; - ShadeConstants _shade_constants; - BYTE * RESTRICT _dest; - DWORD _iscale; - DWORD _texturefrac; - BYTE * RESTRICT _translation; - const BYTE * RESTRICT _source; - int _pitch; - fixed_t _srcalpha; - fixed_t _destalpha; - -public: - DrawTlatedAddColumnRGBACommand() - { - _count = dc_count; - _light = dc_light; - _shade_constants = dc_shade_constants; - _dest = dc_dest; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _translation = dc_translation; - _source = dc_source; - _pitch = dc_pitch; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - } - - void Execute(DrawerThread *thread) override - { - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - fracstep = _iscale * thread->num_cores; - frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - - { - BYTE *translation = _translation; - const BYTE *source = _source; - int pitch = _pitch * thread->num_cores; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } - } -}; - -class DrawShadedColumnRGBACommand : public DrawerCommand -{ -private: - int _count; - BYTE * RESTRICT _dest; - DWORD _iscale; - DWORD _texturefrac; - fixed_t _light; - const BYTE * RESTRICT _source; - lighttable_t * RESTRICT _colormap; - int _color; - int _pitch; - -public: - DrawShadedColumnRGBACommand() - { - _count = dc_count; - _dest = dc_dest; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _light = dc_light; - _source = dc_source; - _colormap = dc_colormap; - _color = dc_color; - _pitch = dc_pitch; - } - - void Execute(DrawerThread *thread) override - { - int count; - uint32_t *dest; - fixed_t frac, fracstep; - - count = thread->count_for_thread(_dest_y, _count); - - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - fracstep = _iscale * thread->num_cores; - frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - - uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - { - const BYTE *source = _source; - BYTE *colormap = _colormap; - int pitch = _pitch * thread->num_cores; - - do - { - DWORD alpha = clamp(colormap[source[frac >> FRACBITS]], 0, 64); - DWORD inv_alpha = 64 - alpha; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; - uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; - uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } - } -}; - -class DrawAddClampColumnRGBACommand : public DrawerCommand -{ - int _count; - BYTE * RESTRICT _dest; - DWORD _iscale; - DWORD _texturefrac; - const BYTE * RESTRICT _source; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - -public: - DrawAddClampColumnRGBACommand() - { - _count = dc_count; - _dest = dc_dest; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _source = dc_source; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - } - - void Execute(DrawerThread *thread) override - { - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - fracstep = _iscale * thread->num_cores; - frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - - { - const BYTE *source = _source; - int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } - } -}; - -class DrawAddClampTranslatedColumnRGBACommand : public DrawerCommand -{ - int _count; - BYTE * RESTRICT _dest; - DWORD _iscale; - DWORD _texturefrac; - BYTE * RESTRICT _translation; - const BYTE * RESTRICT _source; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - -public: - DrawAddClampTranslatedColumnRGBACommand() - { - _count = dc_count; - _dest = dc_dest; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _translation = dc_translation; - _source = dc_source; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - } - - void Execute(DrawerThread *thread) override - { - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - fracstep = _iscale * thread->num_cores; - frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - - { - BYTE *translation = _translation; - const BYTE *source = _source; - int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } - } -}; - -class DrawSubClampColumnRGBACommand : public DrawerCommand -{ - int _count; - BYTE * RESTRICT _dest; - DWORD _iscale; - DWORD _texturefrac; - const BYTE * RESTRICT _source; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - -public: - DrawSubClampColumnRGBACommand() - { - _count = dc_count; - _dest = dc_dest; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _source = dc_source; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - } - - void Execute(DrawerThread *thread) override - { - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - fracstep = _iscale * thread->num_cores; - frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - - { - const BYTE *source = _source; - int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } - } -}; - -class DrawSubClampTranslatedColumnRGBACommand : public DrawerCommand -{ - int _count; - BYTE * RESTRICT _dest; - DWORD _iscale; - DWORD _texturefrac; - const BYTE * RESTRICT _source; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - BYTE * RESTRICT _translation; - -public: - DrawSubClampTranslatedColumnRGBACommand() - { - _count = dc_count; - _dest = dc_dest; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _source = dc_source; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - _translation = dc_translation; - } - - void Execute(DrawerThread *thread) override - { - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - fracstep = _iscale * thread->num_cores; - frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - - { - BYTE *translation = _translation; - const BYTE *source = _source; - int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } - } -}; - -class DrawRevSubClampColumnRGBACommand : public DrawerCommand -{ - int _count; - BYTE * RESTRICT _dest; - DWORD _iscale; - DWORD _texturefrac; - const BYTE * RESTRICT _source; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - -public: - DrawRevSubClampColumnRGBACommand() - { - _count = dc_count; - _dest = dc_dest; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _source = dc_source; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - } - - void Execute(DrawerThread *thread) override - { - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - fracstep = _iscale * thread->num_cores; - frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - - { - const BYTE *source = _source; - int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } - } -}; - -class DrawRevSubClampTranslatedColumnRGBACommand : public DrawerCommand -{ - int _count; - BYTE * RESTRICT _dest; - DWORD _iscale; - DWORD _texturefrac; - const BYTE * RESTRICT _source; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - BYTE * RESTRICT _translation; - -public: - DrawRevSubClampTranslatedColumnRGBACommand() - { - _count = dc_count; - _dest = dc_dest; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _source = dc_source; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - _translation = dc_translation; - } - - void Execute(DrawerThread *thread) override - { - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - fracstep = _iscale * thread->num_cores; - frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - - { - BYTE * RESTRICT translation = _translation; - const BYTE * RESTRICT source = _source; - int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } - } -}; - -class DrawSpanRGBACommand : public DrawerCommand -{ - const uint32_t * RESTRICT _source; fixed_t _xfrac; fixed_t _yfrac; fixed_t _xstep; @@ -1501,14 +900,17 @@ class DrawSpanRGBACommand : public DrawerCommand int _xbits; int _ybits; BYTE * RESTRICT _destorg; - fixed_t _light; + + const uint32_t * RESTRICT _source; + uint32_t _light; ShadeConstants _shade_constants; bool _magnifying; -public: - DrawSpanRGBACommand() + uint32_t _srcalpha; + uint32_t _destalpha; + + DrawerSpanCommand() { - _source = (const uint32_t*)ds_source; _xfrac = ds_xfrac; _yfrac = ds_yfrac; _xstep = ds_xstep; @@ -1519,752 +921,270 @@ public: _xbits = ds_xbits; _ybits = ds_ybits; _destorg = dc_destorg; - _light = ds_light; + + _source = (const uint32_t*)ds_source; + _light = LightBgra::calc_light_multiplier(ds_light); _shade_constants = ds_shade_constants; - _magnifying = !span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); + _magnifying = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); + + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); } + class LoopIterator + { + public: + uint32_t *dest; + int count; + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + BYTE yshift; + BYTE xshift; + int xmask; + bool is_64x64; + bool skipped; + + LoopIterator(DrawerSpanCommand *command, DrawerThread *thread) + { + dest = ylookup[command->_y] + command->_x1 + (uint32_t*)command->_destorg; + count = command->_x2 - command->_x1 + 1; + xfrac = command->_xfrac; + yfrac = command->_yfrac; + xstep = command->_xstep; + ystep = command->_ystep; + yshift = 32 - command->_ybits; + xshift = yshift - command->_xbits; + xmask = ((1 << command->_xbits) - 1) << command->_ybits; + is_64x64 = command->_xbits == 6 && command->_ybits == 6; + skipped = thread->line_skipped_by_thread(command->_y); + } + + // 64x64 is the most common case by far, so special case it. + int spot64() + { + return ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + } + + int spot() + { + return ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + } + + explicit operator bool() + { + return !skipped && count > 0; + } + + bool next() + { + dest++; + xfrac += xstep; + yfrac += ystep; + return (--count) != 0; + } + }; +}; + +class DrawSpanRGBACommand : public DrawerSpanCommand +{ +public: void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(_y)) - return; - - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const uint32_t* source = _source; - int count; - int spot; - - xfrac = _xfrac; - yfrac = _yfrac; - - dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - - count = _x2 - _x1 + 1; - - xstep = _xstep; - ystep = _ystep; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; + LoopIterator loop(this, thread); + if (!loop) return; if (_magnifying) { - if (_xbits == 6 && _ybits == 6) + if (loop.is_64x64) { - // 64x64 is the most common case by far, so special case it. - do { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); + *loop.dest = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants); + } while (loop.next()); } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; - do { - // Current texture index in u,v. - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); + *loop.dest = LightBgra::shade_bgra(_source[loop.spot()], _light, _shade_constants); + } while (loop.next()); } } else { - if (_xbits == 6 && _ybits == 6) + if (loop.is_64x64) { - // 64x64 is the most common case by far, so special case it. - do { - *dest++ = shade_bgra(sample_bilinear(source, xfrac, yfrac, 26, 26), light, shade_constants); - xfrac += xstep; - yfrac += ystep; - } while (--count); + *loop.dest = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, loop.xfrac, loop.yfrac, 26, 26), _light, _shade_constants); + } while (loop.next()); } else { do { - *dest++ = shade_bgra(sample_bilinear(source, xfrac, yfrac, 32 - _xbits, 32 - _ybits), light, shade_constants); - xfrac += xstep; - yfrac += ystep; - } while (--count); + *loop.dest = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, loop.xfrac, loop.yfrac, 32 - _xbits, 32 - _ybits), _light, _shade_constants); + } while (loop.next()); } } } }; -class DrawSpanMaskedRGBACommand : public DrawerCommand +class DrawSpanMaskedRGBACommand : public DrawerSpanCommand { - const uint32_t * RESTRICT _source; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _xfrac; - fixed_t _yfrac; - BYTE * RESTRICT _destorg; - int _x1; - int _x2; - int _y1; - int _y; - fixed_t _xstep; - fixed_t _ystep; - int _xbits; - int _ybits; - bool _magnifying; - public: - DrawSpanMaskedRGBACommand() - { - _source = (const uint32_t*)ds_source; - _light = ds_light; - _shade_constants = ds_shade_constants; - _xfrac = ds_xfrac; - _yfrac = ds_yfrac; - _destorg = dc_destorg; - _x1 = ds_x1; - _x2 = ds_x2; - _y = ds_y; - _xstep = ds_xstep; - _ystep = ds_ystep; - _xbits = ds_xbits; - _ybits = ds_ybits; - _magnifying = !span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); - } - void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(_y)) - return; - - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const uint32_t* source = _source; - int count; - int spot; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - xfrac = _xfrac; - yfrac = _yfrac; - - dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - - count = _x2 - _x1 + 1; - - xstep = _xstep; - ystep = _ystep; + LoopIterator loop(this, thread); + if (!loop) return; if (_magnifying) { - if (_xbits == 6 && _ybits == 6) + if (loop.is_64x64) { - // 64x64 is the most common case by far, so special case it. do { - uint32_t texdata; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - texdata = source[spot]; - *dest = alpha_blend(shade_bgra(texdata, light, shade_constants), *dest); - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants); + *loop.dest = BlendBgra::alpha_blend(fg, *loop.dest); + } while (loop.next()); } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; do { - uint32_t texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - *dest = alpha_blend(shade_bgra(texdata, light, shade_constants), *dest); - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.spot()], _light, _shade_constants); + *loop.dest = BlendBgra::alpha_blend(fg, *loop.dest); + } while (loop.next()); } } else { - if (_xbits == 6 && _ybits == 6) + if (loop.is_64x64) { - // 64x64 is the most common case by far, so special case it. do { - *dest = alpha_blend(shade_bgra(sample_bilinear(source, xfrac, yfrac, 26, 26), light, shade_constants), *dest); - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, loop.xfrac, loop.yfrac, 26, 26), _light, _shade_constants); + *loop.dest = BlendBgra::alpha_blend(fg, *loop.dest); + } while (loop.next()); } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; do { - *dest = alpha_blend(shade_bgra(sample_bilinear(source, xfrac, yfrac, 32 - _xbits, 32 - _ybits), light, shade_constants), *dest); - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, loop.xfrac, loop.yfrac, 32 - _xbits, 32 - _ybits), _light, _shade_constants); + *loop.dest = BlendBgra::alpha_blend(fg, *loop.dest); + } while (loop.next()); } } } }; -class DrawSpanTranslucentRGBACommand : public DrawerCommand +class DrawSpanTranslucentRGBACommand : public DrawerSpanCommand { - const uint32_t * RESTRICT _source; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _xfrac; - fixed_t _yfrac; - BYTE * RESTRICT _destorg; - int _x1; - int _x2; - int _y1; - int _y; - fixed_t _xstep; - fixed_t _ystep; - int _xbits; - int _ybits; - fixed_t _srcalpha; - fixed_t _destalpha; - public: - DrawSpanTranslucentRGBACommand() - { - _source = (const uint32_t *)ds_source; - _light = ds_light; - _shade_constants = ds_shade_constants; - _xfrac = ds_xfrac; - _yfrac = ds_yfrac; - _destorg = dc_destorg; - _x1 = ds_x1; - _x2 = ds_x2; - _y = ds_y; - _xstep = ds_xstep; - _ystep = ds_ystep; - _xbits = ds_xbits; - _ybits = ds_ybits; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - } - void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(_y)) - return; + LoopIterator loop(this, thread); + if (!loop) return; - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const uint32_t* source = _source; - int count; - int spot; - - xfrac = _xfrac; - yfrac = _yfrac; - - dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - - count = _x2 - _x1 + 1; - - xstep = _xstep; - ystep = _ystep; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - if (_xbits == 6 && _ybits == 6) + if (loop.is_64x64) { - // 64x64 is the most common case by far, so special case it. do { - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - uint32_t fg = shade_bgra(source[spot], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; - - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; do { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - uint32_t fg = shade_bgra(source[spot], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; - - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.spot()], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); } } }; -class DrawSpanMaskedTranslucentRGBACommand : public DrawerCommand +class DrawSpanMaskedTranslucentRGBACommand : public DrawerSpanCommand { - const uint32_t * RESTRICT _source; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _xfrac; - fixed_t _yfrac; - BYTE * RESTRICT _destorg; - int _x1; - int _x2; - int _y1; - int _y; - fixed_t _xstep; - fixed_t _ystep; - int _xbits; - int _ybits; - fixed_t _srcalpha; - fixed_t _destalpha; - public: - DrawSpanMaskedTranslucentRGBACommand() - { - _source = (const uint32_t*)ds_source; - _light = ds_light; - _shade_constants = ds_shade_constants; - _xfrac = ds_xfrac; - _yfrac = ds_yfrac; - _destorg = dc_destorg; - _x1 = ds_x1; - _x2 = ds_x2; - _y = ds_y; - _xstep = ds_xstep; - _ystep = ds_ystep; - _xbits = ds_xbits; - _ybits = ds_ybits; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - } - void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(_y)) - return; + LoopIterator loop(this, thread); + if (!loop) return; - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const uint32_t* source = _source; - int count; - int spot; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - xfrac = _xfrac; - yfrac = _yfrac; - - dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - - count = _x2 - _x1 + 1; - - xstep = _xstep; - ystep = _ystep; - - if (_xbits == 6 && _ybits == 6) + if (loop.is_64x64) { - // 64x64 is the most common case by far, so special case it. do { - uint32_t texdata; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - texdata = source[spot]; - if (texdata != 0) - { - uint32_t fg = shade_bgra(texdata, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, calc_blend_bgalpha(fg, _destalpha)); + } while (loop.next()); } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; do { - uint32_t texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - if (texdata != 0) - { - uint32_t fg = shade_bgra(texdata, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.spot()], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, calc_blend_bgalpha(fg, _destalpha)); + } while (loop.next()); } } }; -class DrawSpanAddClampRGBACommand : public DrawerCommand +class DrawSpanAddClampRGBACommand : public DrawerSpanCommand { - const uint32_t * RESTRICT _source; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _xfrac; - fixed_t _yfrac; - BYTE * RESTRICT _destorg; - int _x1; - int _x2; - int _y1; - int _y; - fixed_t _xstep; - fixed_t _ystep; - int _xbits; - int _ybits; - fixed_t _srcalpha; - fixed_t _destalpha; - public: - DrawSpanAddClampRGBACommand() - { - _source = (const uint32_t*)ds_source; - _light = ds_light; - _shade_constants = ds_shade_constants; - _xfrac = ds_xfrac; - _yfrac = ds_yfrac; - _destorg = dc_destorg; - _x1 = ds_x1; - _x2 = ds_x2; - _y = ds_y; - _xstep = ds_xstep; - _ystep = ds_ystep; - _xbits = ds_xbits; - _ybits = ds_ybits; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - } - void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(_y)) - return; + LoopIterator loop(this, thread); + if (!loop) return; - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const uint32_t* source = _source; - int count; - int spot; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - xfrac = _xfrac; - yfrac = _yfrac; - - dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - - count = _x2 - _x1 + 1; - - xstep = _xstep; - ystep = _ystep; - - if (_xbits == 6 && _ybits == 6) + if (loop.is_64x64) { - // 64x64 is the most common case by far, so special case it. do { - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - uint32_t fg = shade_bgra(source[spot], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; - - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; do { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - uint32_t fg = shade_bgra(source[spot], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; - - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.spot()], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); } } }; -class DrawSpanMaskedAddClampRGBACommand : public DrawerCommand +class DrawSpanMaskedAddClampRGBACommand : public DrawerSpanCommand { - const uint32_t * RESTRICT _source; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _xfrac; - fixed_t _yfrac; - BYTE * RESTRICT _destorg; - int _x1; - int _x2; - int _y1; - int _y; - fixed_t _xstep; - fixed_t _ystep; - int _xbits; - int _ybits; - fixed_t _srcalpha; - fixed_t _destalpha; - public: - DrawSpanMaskedAddClampRGBACommand() - { - _source = (const uint32_t*)ds_source; - _light = ds_light; - _shade_constants = ds_shade_constants; - _xfrac = ds_xfrac; - _yfrac = ds_yfrac; - _destorg = dc_destorg; - _x1 = ds_x1; - _x2 = ds_x2; - _y = ds_y; - _xstep = ds_xstep; - _ystep = ds_ystep; - _xbits = ds_xbits; - _ybits = ds_ybits; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - } - void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(_y)) - return; + LoopIterator loop(this, thread); + if (!loop) return; - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const uint32_t* source = _source; - int count; - int spot; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - xfrac = _xfrac; - yfrac = _yfrac; - - dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - - count = _x2 - _x1 + 1; - - xstep = _xstep; - ystep = _ystep; - - if (_xbits == 6 && _ybits == 6) + if (loop.is_64x64) { - // 64x64 is the most common case by far, so special case it. do { - uint32_t texdata; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - texdata = source[spot]; - if (texdata != 0) - { - uint32_t fg = shade_bgra(texdata, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, calc_blend_bgalpha(fg, _destalpha)); + } while (loop.next()); } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; do { - uint32_t texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - if (texdata != 0) - { - uint32_t fg = shade_bgra(texdata, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.spot()], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, calc_blend_bgalpha(fg, _destalpha)); + } while (loop.next()); } } }; @@ -2296,13 +1216,15 @@ public: uint32_t *dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; int count = (_x2 - _x1 + 1); - uint32_t light = calc_light_multiplier(_light); - uint32_t color = shade_pal_index_simple(_color, light); + uint32_t light = LightBgra::calc_light_multiplier(_light); + uint32_t color = LightBgra::shade_pal_index_simple(_color, light); for (int i = 0; i < count; i++) dest[i] = color; } }; +///////////////////////////////////////////////////////////////////////////// + class DrawSlabRGBACommand : public DrawerCommand { int _dx; @@ -2344,7 +1266,7 @@ public: uint32_t *p = _p; ShadeConstants shade_constants = _shade_constants; const BYTE *colormap = _colormap; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); int pitch = _pitch; int x; @@ -2358,7 +1280,7 @@ public: { while (dy > 0) { - *p = shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); + *p = LightBgra::shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); p += pitch; v += vi; dy--; @@ -2368,7 +1290,7 @@ public: { while (dy > 0) { - uint32_t color = shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); + uint32_t color = LightBgra::shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); p[0] = color; p[1] = color; p += pitch; @@ -2380,7 +1302,7 @@ public: { while (dy > 0) { - uint32_t color = shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); + uint32_t color = LightBgra::shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); p[0] = color; p[1] = color; p[2] = color; @@ -2393,7 +1315,7 @@ public: { while (dy > 0) { - uint32_t color = shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); + uint32_t color = LightBgra::shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); p[0] = color; p[1] = color; p[2] = color; @@ -2405,7 +1327,7 @@ public: } else while (dy > 0) { - uint32_t color = shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); + uint32_t color = LightBgra::shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); // The optimizer will probably turn this into a memset call. // Since dx is not likely to be large, I'm not sure that's a good thing, // hence the alternatives above. @@ -2420,1000 +1342,484 @@ public: } }; -class Vlinec1RGBACommand : public DrawerCommand +///////////////////////////////////////////////////////////////////////////// + +class DrawerWall1Command : public DrawerCommand { - DWORD _iscale; - DWORD _texturefrac; - int _count; - const BYTE * RESTRICT _source; - const BYTE * RESTRICT _source2; - uint32_t _texturefracx; +public: BYTE * RESTRICT _dest; - int vlinebits; int _pitch; - fixed_t _light; + int _count; + DWORD _texturefrac; + uint32_t _texturefracx; + DWORD _iscale; + int _vlinebits; + uint32_t _vlinemax; + + const uint32 * RESTRICT _source; + const uint32 * RESTRICT _source2; + uint32_t _light; ShadeConstants _shade_constants; -public: - Vlinec1RGBACommand() + uint32_t _srcalpha; + uint32_t _destalpha; + + DrawerWall1Command(int vlinebits, uint32_t vlinemax) { - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _count = dc_count; - _source = dc_source; - _source2 = dc_source2; - _texturefracx = dc_texturefracx; _dest = dc_dest; - vlinebits = ::vlinebits; _pitch = dc_pitch; - _light = dc_light; + _count = dc_count; + _texturefrac = dc_texturefrac; + _texturefracx = dc_texturefracx; + _iscale = dc_iscale; + _vlinebits = vlinebits; + _vlinemax = vlinemax; + + _source = (const uint32 *)dc_source; + _source2 = (const uint32 *)dc_source2; + _light = LightBgra::calc_light_multiplier(dc_light); _shade_constants = dc_shade_constants; + + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); + } + + class LoopIterator + { + public: + uint32_t *dest; + int pitch; + int count; + uint32_t fracstep; + uint32_t frac; + uint32_t texturefracx; + int bits; + + LoopIterator(DrawerWall1Command *command, DrawerThread *thread) + { + count = thread->count_for_thread(command->_dest_y, command->_count); + if (count <= 0) + return; + + fracstep = command->_iscale * thread->num_cores; + frac = command->_texturefrac + command->_iscale * thread->skipped_by_thread(command->_dest_y); + texturefracx = command->_texturefracx; + dest = thread->dest_for_thread(command->_dest_y, command->_pitch, (uint32_t*)command->_dest); + bits = command->_vlinebits; + pitch = command->_pitch * thread->num_cores; + } + + explicit operator bool() + { + return count > 0; + } + + int sample_index() + { + return frac >> bits; + } + + bool next() + { + frac += fracstep; + dest += pitch; + return (--count) != 0; + } + }; +}; + +class DrawerWall4Command : public DrawerCommand +{ +public: + BYTE * RESTRICT _dest; + int _count; + int _pitch; + int _vlinebits; + uint32_t _vlinemax; + ShadeConstants _shade_constants; + uint32_t _vplce[4]; + uint32_t _vince[4]; + uint32_t _buftexturefracx[4]; + const uint32_t * RESTRICT _bufplce[4]; + const uint32_t * RESTRICT _bufplce2[4]; + uint32_t _light[4]; + + uint32_t _srcalpha; + uint32_t _destalpha; + + DrawerWall4Command(int vlinebits, uint32_t vlinemax) + { + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _vlinebits = vlinebits; + _vlinemax = vlinemax; + _shade_constants = dc_shade_constants; + for (int i = 0; i < 4; i++) + { + _vplce[i] = vplce[i]; + _vince[i] = vince[i]; + _buftexturefracx[i] = buftexturefracx[i]; + _bufplce[i] = (const uint32_t *)bufplce[i]; + _bufplce2[i] = (const uint32_t *)bufplce2[i]; + _light[i] = LightBgra::calc_light_multiplier(palookuplight[i]); + } + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); + } + + class LoopIterator + { + public: + uint32_t *dest; + int pitch; + int count; + int bits; + uint32_t vplce[4]; + uint32_t vince[4]; + + LoopIterator(DrawerWall4Command *command, DrawerThread *thread) + { + count = thread->count_for_thread(command->_dest_y, command->_count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(command->_dest_y, command->_pitch, (uint32_t*)command->_dest); + pitch = command->_pitch * thread->num_cores; + bits = command->_vlinebits; + + int skipped = thread->skipped_by_thread(command->_dest_y); + for (int i = 0; i < 4; i++) + { + vplce[i] = command->_vplce[i] + command->_vince[i] * skipped; + vince[i] = command->_vince[i] * thread->num_cores; + } + } + + explicit operator bool() + { + return count > 0; + } + + int sample_index(int col) + { + return vplce[col] >> bits; + } + + bool next() + { + vplce[0] += vince[0]; + vplce[1] += vince[1]; + vplce[2] += vince[2]; + vplce[3] += vince[3]; + dest += pitch; + return (--count) != 0; + } + }; +}; + +class Vlinec1RGBACommand : public DrawerWall1Command +{ +public: + Vlinec1RGBACommand() : DrawerWall1Command(vlinebits, vlinemax) + { } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - DWORD fracstep = _iscale * thread->num_cores; - DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - const uint32 *source = (const uint32 *)_source; - const uint32 *source2 = (const uint32 *)_source2; - uint32_t texturefracx = _texturefracx; - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int bits = vlinebits; - int pitch = _pitch * thread->num_cores; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; + LoopIterator loop(this, thread); + if (!loop) return; if (_source2 == nullptr) { do { - *dest = shade_bgra(source[frac >> bits], light, shade_constants); - frac += fracstep; - dest += pitch; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.sample_index()], _light, _shade_constants); + *loop.dest = BlendBgra::copy(fg); + } while (loop.next()); } else { do { - *dest = shade_bgra(sample_bilinear(source, source2, texturefracx, frac, bits), light, shade_constants); - frac += fracstep; - dest += pitch; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.bits, _vlinemax), _light, _shade_constants); + *loop.dest = BlendBgra::copy(fg); + } while (loop.next()); } } }; -class Vlinec4RGBACommand : public DrawerCommand +class Vlinec4RGBACommand : public DrawerWall4Command { - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - int vlinebits; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32_t * RESTRICT bufplce[4]; - const uint32_t * RESTRICT bufplce2[4]; - uint32_t buftexturefracx[4]; - public: - Vlinec4RGBACommand() + Vlinec4RGBACommand() : DrawerWall4Command(vlinebits, vlinemax) { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - vlinebits = ::vlinebits; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32_t *)::bufplce[i]; - bufplce2[i] = (const uint32_t *)::bufplce2[i]; - buftexturefracx[i] = ::buftexturefracx[i]; - } } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; + LoopIterator loop(this, thread); + if (!loop) return; - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - int bits = vlinebits; - DWORD place; - - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - - if (bufplce2[0] == nullptr) + if (_bufplce2[0] == nullptr) { do { - dest[0] = shade_bgra(bufplce[0][(place = local_vplce[0]) >> bits], light0, shade_constants); local_vplce[0] = place + local_vince[0]; - dest[1] = shade_bgra(bufplce[1][(place = local_vplce[1]) >> bits], light1, shade_constants); local_vplce[1] = place + local_vince[1]; - dest[2] = shade_bgra(bufplce[2][(place = local_vplce[2]) >> bits], light2, shade_constants); local_vplce[2] = place + local_vince[2]; - dest[3] = shade_bgra(bufplce[3][(place = local_vplce[3]) >> bits], light3, shade_constants); local_vplce[3] = place + local_vince[3]; - dest += pitch; - } while (--count); + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); + loop.dest[i] = BlendBgra::copy(fg); + } + } while (loop.next()); } else { do { - dest[0] = shade_bgra(sample_bilinear(bufplce[0], bufplce2[0], buftexturefracx[0], place = local_vplce[0], bits), light0, shade_constants); local_vplce[0] = place + local_vince[0]; - dest[1] = shade_bgra(sample_bilinear(bufplce[1], bufplce2[1], buftexturefracx[1], place = local_vplce[1], bits), light1, shade_constants); local_vplce[1] = place + local_vince[1]; - dest[2] = shade_bgra(sample_bilinear(bufplce[2], bufplce2[2], buftexturefracx[2], place = local_vplce[2], bits), light2, shade_constants); local_vplce[2] = place + local_vince[2]; - dest[3] = shade_bgra(sample_bilinear(bufplce[3], bufplce2[3], buftexturefracx[3], place = local_vplce[3], bits), light3, shade_constants); local_vplce[3] = place + local_vince[3]; - dest += pitch; - } while (--count); + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_bufplce[i], _bufplce2[i], _buftexturefracx[i], loop.sample_index(i), loop.bits, _vlinemax), _light[i], _shade_constants); + loop.dest[i] = BlendBgra::copy(fg); + } + } while (loop.next()); } } }; -class Mvlinec1RGBACommand : public DrawerCommand +class Mvlinec1RGBACommand : public DrawerWall1Command { - DWORD _iscale; - DWORD _texturefrac; - int _count; - const BYTE * RESTRICT _source; - const BYTE * RESTRICT _source2; - uint32_t _texturefracx; - BYTE * RESTRICT _dest; - int mvlinebits; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - public: - Mvlinec1RGBACommand() + Mvlinec1RGBACommand() : DrawerWall1Command(mvlinebits, mvlinemax) { - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _count = dc_count; - _source = dc_source; - _source2 = dc_source2; - _texturefracx = dc_texturefracx; - _dest = dc_dest; - mvlinebits = ::mvlinebits; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - DWORD fracstep = _iscale * thread->num_cores; - DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - const uint32 *source = (const uint32 *)_source; - const uint32 *source2 = (const uint32 *)_source2; - uint32_t texturefracx = _texturefracx; - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int bits = mvlinebits; - int pitch = _pitch * thread->num_cores; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; + LoopIterator loop(this, thread); + if (!loop) return; if (_source2 == nullptr) { do { - uint32_t pix = source[frac >> bits]; - *dest = alpha_blend(shade_bgra(pix, light, shade_constants), *dest); - frac += fracstep; - dest += pitch; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.sample_index()], _light, _shade_constants); + *loop.dest = BlendBgra::alpha_blend(fg, *loop.dest); + } while (loop.next()); } else { do { - *dest = alpha_blend(shade_bgra(sample_bilinear(source, source2, texturefracx, frac, bits), light, shade_constants), *dest); - frac += fracstep; - dest += pitch; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.bits, _vlinemax), _light, _shade_constants); + *loop.dest = BlendBgra::alpha_blend(fg, *loop.dest); + } while (loop.next()); } } }; -class Mvlinec4RGBACommand : public DrawerCommand +class Mvlinec4RGBACommand : public DrawerWall4Command { - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - int mvlinebits; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32 * RESTRICT bufplce[4]; - const uint32 * RESTRICT bufplce2[4]; - uint32_t buftexturefracx[4]; - public: - Mvlinec4RGBACommand() + Mvlinec4RGBACommand(): DrawerWall4Command(mvlinebits, mvlinemax) { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - mvlinebits = ::mvlinebits; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; - bufplce2[i] = (const uint32_t *)::bufplce2[i]; - buftexturefracx[i] = ::buftexturefracx[i]; - } } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; + LoopIterator loop(this, thread); + if (!loop) return; - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - int bits = mvlinebits; - DWORD place; - - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - - if (bufplce2[0] == nullptr) + if (_bufplce2[0] == nullptr) { do { - uint32_t pix; - pix = bufplce[0][(place = local_vplce[0]) >> bits]; dest[0] = alpha_blend(shade_bgra(pix, light0, shade_constants), dest[0]); local_vplce[0] = place + local_vince[0]; - pix = bufplce[1][(place = local_vplce[1]) >> bits]; dest[1] = alpha_blend(shade_bgra(pix, light1, shade_constants), dest[1]); local_vplce[1] = place + local_vince[1]; - pix = bufplce[2][(place = local_vplce[2]) >> bits]; dest[2] = alpha_blend(shade_bgra(pix, light2, shade_constants), dest[2]); local_vplce[2] = place + local_vince[2]; - pix = bufplce[3][(place = local_vplce[3]) >> bits]; dest[3] = alpha_blend(shade_bgra(pix, light3, shade_constants), dest[3]); local_vplce[3] = place + local_vince[3]; - dest += pitch; - } while (--count); + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); + loop.dest[i] = BlendBgra::alpha_blend(fg, loop.dest[i]); + } + } while (loop.next()); } else { do { - dest[0] = alpha_blend(shade_bgra(sample_bilinear(bufplce[0], bufplce2[0], buftexturefracx[0], place = local_vplce[0], bits), light0, shade_constants), dest[0]); local_vplce[0] = place + local_vince[0]; - dest[1] = alpha_blend(shade_bgra(sample_bilinear(bufplce[1], bufplce2[1], buftexturefracx[1], place = local_vplce[1], bits), light1, shade_constants), dest[1]); local_vplce[1] = place + local_vince[1]; - dest[2] = alpha_blend(shade_bgra(sample_bilinear(bufplce[2], bufplce2[2], buftexturefracx[2], place = local_vplce[2], bits), light2, shade_constants), dest[2]); local_vplce[2] = place + local_vince[2]; - dest[3] = alpha_blend(shade_bgra(sample_bilinear(bufplce[3], bufplce2[3], buftexturefracx[3], place = local_vplce[3], bits), light3, shade_constants), dest[3]); local_vplce[3] = place + local_vince[3]; - dest += pitch; - } while (--count); + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_bufplce[i], _bufplce2[i], _buftexturefracx[i], loop.sample_index(i), loop.bits, _vlinemax), _light[i], _shade_constants); + loop.dest[i] = BlendBgra::alpha_blend(fg, loop.dest[i]); + } + } while (loop.next()); } } }; -class Tmvline1AddRGBACommand : public DrawerCommand +class Tmvline1AddRGBACommand : public DrawerWall1Command { - DWORD _iscale; - DWORD _texturefrac; - int _count; - const BYTE * RESTRICT _source; - BYTE * RESTRICT _dest; - int tmvlinebits; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - public: - Tmvline1AddRGBACommand() + Tmvline1AddRGBACommand() : DrawerWall1Command(tmvlinebits, tmvlinemax) { - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _count = dc_count; - _source = dc_source; - _dest = dc_dest; - tmvlinebits = ::tmvlinebits; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - DWORD fracstep = _iscale * thread->num_cores; - DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - const uint32 *source = (const uint32 *)_source; - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int bits = tmvlinebits; - int pitch = _pitch * thread->num_cores; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - + LoopIterator loop(this, thread); + if (!loop) return; do { - uint32_t pix = source[frac >> bits]; - - uint32_t fg_alpha = src_alpha; - uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); - - uint32_t fg = shade_bgra(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - - frac += fracstep; - dest += pitch; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.sample_index()], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, calc_blend_bgalpha(fg, _destalpha)); + } while (loop.next()); } }; -class Tmvline4AddRGBACommand : public DrawerCommand +class Tmvline4AddRGBACommand : public DrawerWall4Command { - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - int tmvlinebits; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32 * RESTRICT bufplce[4]; - public: - Tmvline4AddRGBACommand() + Tmvline4AddRGBACommand() : DrawerWall4Command(tmvlinebits, tmvlinemax) { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - tmvlinebits = ::tmvlinebits; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; - } } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - int bits = tmvlinebits; - - uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - + LoopIterator loop(this, thread); + if (!loop) return; do { - for (int i = 0; i < 4; ++i) + for (int i = 0; i < 4; i++) { - uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - - uint32_t fg_alpha = src_alpha; - uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); - - uint32_t fg = shade_bgra(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - - local_vplce[i] += local_vince[i]; + uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); + loop.dest[i] = BlendBgra::add(fg, loop.dest[i], _srcalpha, calc_blend_bgalpha(fg, _destalpha)); } - dest += pitch; - } while (--count); + } while (loop.next()); } }; -class Tmvline1AddClampRGBACommand : public DrawerCommand +class Tmvline1AddClampRGBACommand : public DrawerWall1Command { - DWORD _iscale; - DWORD _texturefrac; - int _count; - const BYTE * RESTRICT _source; - BYTE * RESTRICT _dest; - int tmvlinebits; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - public: - Tmvline1AddClampRGBACommand() + Tmvline1AddClampRGBACommand() : DrawerWall1Command(tmvlinebits, tmvlinemax) { - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _count = dc_count; - _source = dc_source; - _dest = dc_dest; - tmvlinebits = ::tmvlinebits; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - DWORD fracstep = _iscale * thread->num_cores; - DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - const uint32 *source = (const uint32 *)_source; - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int bits = tmvlinebits; - int pitch = _pitch * thread->num_cores; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - + LoopIterator loop(this, thread); + if (!loop) return; do { - uint32_t pix = source[frac >> bits]; - - uint32_t fg_alpha = src_alpha; - uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); - - uint32_t fg = shade_bgra(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - - frac += fracstep; - dest += pitch; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.sample_index()], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, calc_blend_bgalpha(fg, _destalpha)); + } while (loop.next()); } }; -class Tmvline4AddClampRGBACommand : public DrawerCommand +class Tmvline4AddClampRGBACommand : public DrawerWall4Command { - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - int tmvlinebits; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32 *RESTRICT bufplce[4]; - public: - Tmvline4AddClampRGBACommand() + Tmvline4AddClampRGBACommand() : DrawerWall4Command(tmvlinebits, tmvlinemax) { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - tmvlinebits = ::tmvlinebits; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; - } } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - int bits = tmvlinebits; - - uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - + LoopIterator loop(this, thread); + if (!loop) return; do { - for (int i = 0; i < 4; ++i) + for (int i = 0; i < 4; i++) { - uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - - uint32_t fg_alpha = src_alpha; - uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); - - uint32_t fg = shade_bgra(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - - local_vplce[i] += local_vince[i]; + uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); + loop.dest[i] = BlendBgra::add(fg, loop.dest[i], _srcalpha, calc_blend_bgalpha(fg, _destalpha)); } - dest += pitch; - } while (--count); + } while (loop.next()); } }; -class Tmvline1SubClampRGBACommand : public DrawerCommand +class Tmvline1SubClampRGBACommand : public DrawerWall1Command { - DWORD _iscale; - DWORD _texturefrac; - int _count; - const BYTE * RESTRICT _source; - BYTE * RESTRICT _dest; - int tmvlinebits; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - public: - Tmvline1SubClampRGBACommand() + Tmvline1SubClampRGBACommand() : DrawerWall1Command(tmvlinebits, tmvlinemax) { - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _count = dc_count; - _source = dc_source; - _dest = dc_dest; - tmvlinebits = ::tmvlinebits; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - DWORD fracstep = _iscale * thread->num_cores; - DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - const uint32 *source = (const uint32 *)_source; - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int bits = tmvlinebits; - int pitch = _pitch * thread->num_cores; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - + LoopIterator loop(this, thread); + if (!loop) return; do { - uint32_t pix = source[frac >> bits]; - - uint32_t fg_alpha = src_alpha; - uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); - - uint32_t fg = shade_bgra(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - - frac += fracstep; - dest += pitch; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.sample_index()], _light, _shade_constants); + *loop.dest = BlendBgra::sub(fg, *loop.dest, _srcalpha, calc_blend_bgalpha(fg, _destalpha)); + } while (loop.next()); } }; -class Tmvline4SubClampRGBACommand : public DrawerCommand +class Tmvline4SubClampRGBACommand : public DrawerWall4Command { - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - int tmvlinebits; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32 *RESTRICT bufplce[4]; - public: - Tmvline4SubClampRGBACommand() + Tmvline4SubClampRGBACommand() : DrawerWall4Command(tmvlinebits, tmvlinemax) { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - tmvlinebits = ::tmvlinebits; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; - } } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - int bits = tmvlinebits; - - uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - + LoopIterator loop(this, thread); + if (!loop) return; do { - for (int i = 0; i < 4; ++i) + for (int i = 0; i < 4; i++) { - uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - - uint32_t fg_alpha = src_alpha; - uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); - - uint32_t fg = shade_bgra(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - - local_vplce[i] += local_vince[i]; + uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); + loop.dest[i] = BlendBgra::sub(fg, loop.dest[i], _srcalpha, calc_blend_bgalpha(fg, _destalpha)); } - dest += pitch; - } while (--count); + } while (loop.next()); } }; -class Tmvline1RevSubClampRGBACommand : public DrawerCommand +class Tmvline1RevSubClampRGBACommand : public DrawerWall1Command { - DWORD _iscale; - DWORD _texturefrac; - int _count; - const BYTE * RESTRICT _source; - BYTE * RESTRICT _dest; - int tmvlinebits; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - public: - Tmvline1RevSubClampRGBACommand() + Tmvline1RevSubClampRGBACommand() : DrawerWall1Command(tmvlinebits, tmvlinemax) { - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _count = dc_count; - _source = dc_source; - _dest = dc_dest; - tmvlinebits = ::tmvlinebits; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - DWORD fracstep = _iscale * thread->num_cores; - DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - const uint32 *source = (const uint32 *)_source; - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int bits = tmvlinebits; - int pitch = _pitch * thread->num_cores; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - + LoopIterator loop(this, thread); + if (!loop) return; do { - uint32_t pix = source[frac >> bits]; - - uint32_t fg_alpha = src_alpha; - uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); - - uint32_t fg = shade_bgra(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - - frac += fracstep; - dest += pitch; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.sample_index()], _light, _shade_constants); + *loop.dest = BlendBgra::revsub(fg, *loop.dest, _srcalpha, calc_blend_bgalpha(fg, _destalpha)); + } while (loop.next()); } }; -class Tmvline4RevSubClampRGBACommand : public DrawerCommand +class Tmvline4RevSubClampRGBACommand : public DrawerWall4Command { - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - int tmvlinebits; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32 *RESTRICT bufplce[4]; - public: - Tmvline4RevSubClampRGBACommand() + Tmvline4RevSubClampRGBACommand() : DrawerWall4Command(tmvlinebits, tmvlinemax) { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - tmvlinebits = ::tmvlinebits; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; - } } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - int bits = tmvlinebits; - - uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - + LoopIterator loop(this, thread); + if (!loop) return; do { - for (int i = 0; i < 4; ++i) + for (int i = 0; i < 4; i++) { - uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - - uint32_t fg_alpha = src_alpha; - uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); - - uint32_t fg = shade_bgra(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - - local_vplce[i] += local_vince[i]; + uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); + loop.dest[i] = BlendBgra::revsub(fg, loop.dest[i], _srcalpha, calc_blend_bgalpha(fg, _destalpha)); } - dest += pitch; - } while (--count); + } while (loop.next()); } }; +///////////////////////////////////////////////////////////////////////////// + class DrawFogBoundaryLineRGBACommand : public DrawerCommand { int _y; @@ -3446,7 +1852,7 @@ public: uint32_t *dest = ylookup[y] + (uint32_t*)_destorg; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants constants = _shade_constants; do @@ -3563,8 +1969,8 @@ public: uint32_t *dest = ylookup[y] + x1 + (uint32_t*)_destorg; int count = (x2 - x1 + 1); - uint32_t light = calc_light_multiplier(_light); - uint32_t color = shade_pal_index_simple(_color, light); + uint32_t light = LightBgra::calc_light_multiplier(_light); + uint32_t color = LightBgra::shade_pal_index_simple(_color, light); for (int i = 0; i < count; i++) dest[i] = color; } diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 8f97d4ecd..20fff4fc0 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -286,7 +286,7 @@ public: }; ///////////////////////////////////////////////////////////////////////////// -// Pixel shading macros and inline functions: +// Pixel shading inline functions: // Give the compiler a strong hint we want these functions inlined: #ifndef FORCEINLINE @@ -310,220 +310,256 @@ public: #endif #endif -// calculates the light constant passed to the shade_pal_index function -FORCEINLINE uint32_t calc_light_multiplier(dsfixed_t light) +class LightBgra { - return 256 - (light >> (FRACBITS - 8)); -} - -// Calculates a ARGB8 color for the given palette index and light multiplier -FORCEINLINE uint32_t shade_pal_index_simple(uint32_t index, uint32_t light) -{ - const PalEntry &color = GPalette.BaseColors[index]; - uint32_t red = color.r; - uint32_t green = color.g; - uint32_t blue = color.b; - - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; - - return 0xff000000 | (red << 16) | (green << 8) | blue; -} - -FORCEINLINE uint32_t shade_bgra_simple(uint32_t color, uint32_t light) -{ - uint32_t red = (color >> 16) & 0xff; - uint32_t green = (color >> 8) & 0xff; - uint32_t blue = color & 0xff; - - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; - - return 0xff000000 | (red << 16) | (green << 8) | blue; -} - -// Calculates a ARGB8 color for the given palette index, light multiplier and dynamic colormap -FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants) -{ - const PalEntry &color = GPalette.BaseColors[index]; - uint32_t alpha = color.d & 0xff000000; - uint32_t red = color.r; - uint32_t green = color.g; - uint32_t blue = color.b; - if (constants.simple_shade) +public: + // calculates the light constant passed to the shade_pal_index function + FORCEINLINE static uint32_t calc_light_multiplier(dsfixed_t light) { + return 256 - (light >> (FRACBITS - 8)); + } + + // Calculates a ARGB8 color for the given palette index and light multiplier + FORCEINLINE static uint32_t shade_pal_index_simple(uint32_t index, uint32_t light) + { + const PalEntry &color = GPalette.BaseColors[index]; + uint32_t red = color.r; + uint32_t green = color.g; + uint32_t blue = color.b; + red = red * light / 256; green = green * light / 256; blue = blue * light / 256; + + return 0xff000000 | (red << 16) | (green << 8) | blue; } - else + + // Calculates a ARGB8 color for the given palette index, light multiplier and dynamic colormap + FORCEINLINE static uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants) { - uint32_t inv_light = 256 - light; - uint32_t inv_desaturate = 256 - constants.desaturate; - - uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; - - red = (red * inv_desaturate + intensity) / 256; - green = (green * inv_desaturate + intensity) / 256; - blue = (blue * inv_desaturate + intensity) / 256; - - red = (constants.fade_red * inv_light + red * light) / 256; - green = (constants.fade_green * inv_light + green * light) / 256; - blue = (constants.fade_blue * inv_light + blue * light) / 256; - - red = (red * constants.light_red) / 256; - green = (green * constants.light_green) / 256; - blue = (blue * constants.light_blue) / 256; - } - return alpha | (red << 16) | (green << 8) | blue; -} - -FORCEINLINE uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConstants &constants) -{ - uint32_t alpha = color & 0xff000000; - uint32_t red = (color >> 16) & 0xff; - uint32_t green = (color >> 8) & 0xff; - uint32_t blue = color & 0xff; - if (constants.simple_shade) - { - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; - } - else - { - uint32_t inv_light = 256 - light; - uint32_t inv_desaturate = 256 - constants.desaturate; - - uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; - - red = (red * inv_desaturate + intensity) / 256; - green = (green * inv_desaturate + intensity) / 256; - blue = (blue * inv_desaturate + intensity) / 256; - - red = (constants.fade_red * inv_light + red * light) / 256; - green = (constants.fade_green * inv_light + green * light) / 256; - blue = (constants.fade_blue * inv_light + blue * light) / 256; - - red = (red * constants.light_red) / 256; - green = (green * constants.light_green) / 256; - blue = (blue * constants.light_blue) / 256; - } - return alpha | (red << 16) | (green << 8) | blue; -} - -FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) -{ - uint32_t fg_alpha = fg >> 24; - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t alpha = fg_alpha + (fg_alpha >> 7); // 255 -> 256 - uint32_t inv_alpha = 256 - alpha; - - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = bg & 0xff; - - uint32_t red = clamp(fg_red + (bg_red * inv_alpha) / 256, 0, 255); - uint32_t green = clamp(fg_green + (bg_green * inv_alpha) / 256, 0, 255); - uint32_t blue = clamp(fg_blue + (bg_blue * inv_alpha) / 256, 0, 255); - - return 0xff000000 | (red << 16) | (green << 8) | blue; -} - -inline bool span_sampler_setup(const uint32_t * RESTRICT &source, int &xbits, int &ybits, fixed_t xstep, fixed_t ystep) -{ - if (!r_bilinear) - return false; - - // Is this a magfilter or minfilter? - fixed_t xmagnitude = abs(xstep) >> (32 - xbits - FRACBITS); - fixed_t ymagnitude = abs(ystep) >> (32 - ybits - FRACBITS); - fixed_t magnitude = (xmagnitude + ymagnitude) * 2 + (1 << (FRACBITS -1)); - if (magnitude >> FRACBITS == 0) - return false; - - if (r_mipmap) - { - int level = magnitude >> (FRACBITS + 1); - while (level != 0) + const PalEntry &color = GPalette.BaseColors[index]; + uint32_t alpha = color.d & 0xff000000; + uint32_t red = color.r; + uint32_t green = color.g; + uint32_t blue = color.b; + if (constants.simple_shade) { - if (xbits <= 2 || ybits <= 2) - break; - - source += (1 << (xbits)) * (1 << (ybits)); - xbits -= 1; - ybits -= 1; - level >>= 1; + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; + + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; + + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; + + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + return alpha | (red << 16) | (green << 8) | blue; } - return true; -} -FORCEINLINE uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, int ybits) + FORCEINLINE static uint32_t shade_bgra_simple(uint32_t color, uint32_t light) + { + uint32_t red = RPART(color) * light / 256; + uint32_t green = GPART(color) * light / 256; + uint32_t blue = BPART(color) * light / 256; + return 0xff000000 | (red << 16) | (green << 8) | blue; + } + + FORCEINLINE static uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConstants &constants) + { + uint32_t alpha = color & 0xff000000; + uint32_t red = (color >> 16) & 0xff; + uint32_t green = (color >> 8) & 0xff; + uint32_t blue = color & 0xff; + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; + + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; + + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; + + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + return alpha | (red << 16) | (green << 8) | blue; + } +}; + +class BlendBgra { - uint32_t half = 1 << (ybits - 1); - uint32_t y = (texturefracy - half) >> ybits; +public: + FORCEINLINE static uint32_t copy(uint32_t fg) + { + return fg; + } - uint32_t p00 = col0[y]; - uint32_t p01 = col0[y + 1]; - uint32_t p10 = col1[y]; - uint32_t p11 = col1[y + 1]; + FORCEINLINE static uint32_t add(uint32_t fg, uint32_t bg, uint32_t srcalpha, uint32_t destalpha) + { + uint32_t red = MIN((RPART(fg) * srcalpha + RPART(bg) * destalpha) >> 8, 255); + uint32_t green = MIN((GPART(fg) * srcalpha + GPART(bg) * destalpha) >> 8, 255); + uint32_t blue = MIN((BPART(fg) * srcalpha + BPART(bg) * destalpha) >> 8, 255); + return 0xff000000 | (red << 16) | (green << 8) | blue; + } - uint32_t inv_b = texturefracx; - uint32_t inv_a = ((texturefracy + half) >> (ybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; + FORCEINLINE static uint32_t sub(uint32_t fg, uint32_t bg, uint32_t srcalpha, uint32_t destalpha) + { + uint32_t red = clamp((0x10000 - RPART(fg) * srcalpha + RPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - GPART(fg) * srcalpha + GPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - BPART(fg) * srcalpha + BPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256; + return 0xff000000 | (red << 16) | (green << 8) | blue; + } - uint32_t red = (RPART(p00) * a * b + RPART(p01) * inv_a * b + RPART(p10) * a * inv_b + RPART(p11) * inv_a * inv_b + 127) >> 8; - uint32_t green = (GPART(p00) * a * b + GPART(p01) * inv_a * b + GPART(p10) * a * inv_b + GPART(p11) * inv_a * inv_b + 127) >> 8; - uint32_t blue = (BPART(p00) * a * b + BPART(p01) * inv_a * b + BPART(p10) * a * inv_b + BPART(p11) * inv_a * inv_b + 127) >> 8; - uint32_t alpha = (APART(p00) * a * b + APART(p01) * inv_a * b + APART(p10) * a * inv_b + APART(p11) * inv_a * inv_b + 127) >> 8; + FORCEINLINE static uint32_t revsub(uint32_t fg, uint32_t bg, uint32_t srcalpha, uint32_t destalpha) + { + uint32_t red = clamp((0x10000 + RPART(fg) * srcalpha - RPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + GPART(fg) * srcalpha - GPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + BPART(fg) * srcalpha - BPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256; + return 0xff000000 | (red << 16) | (green << 8) | blue; + } - return (alpha << 24) | (red << 16) | (green << 8) | blue; -} + FORCEINLINE static uint32_t alpha_blend(uint32_t fg, uint32_t bg) + { + uint32_t alpha = APART(fg) + (APART(fg) >> 7); // 255 -> 256 + uint32_t inv_alpha = 256 - alpha; + uint32_t red = MIN(RPART(fg) + (RPART(bg) * inv_alpha) / 256, 255); + uint32_t green = MIN(GPART(fg) + (GPART(bg) * inv_alpha) / 256, 255); + uint32_t blue = MIN(BPART(fg) + (BPART(bg) * inv_alpha) / 256, 255); + return 0xff000000 | (red << 16) | (green << 8) | blue; + } +}; -FORCEINLINE uint32_t sample_bilinear(const uint32_t *texture, dsfixed_t xfrac, dsfixed_t yfrac, int xbits, int ybits) +class SampleBgra { - int xshift = (32 - xbits); - int yshift = (32 - ybits); - int xmask = (1 << xshift) - 1; - int ymask = (1 << yshift) - 1; - uint32_t xhalf = 1 << (xbits - 1); - uint32_t yhalf = 1 << (ybits - 1); - uint32_t x = (xfrac - xhalf) >> xbits; - uint32_t y = (yfrac - yhalf) >> ybits; +public: + inline static bool span_sampler_setup(const uint32_t * RESTRICT &source, int &xbits, int &ybits, fixed_t xstep, fixed_t ystep) + { + if (!r_bilinear) + return false; - uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; - uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)]; - uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; - uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)]; + // Is this a magfilter or minfilter? + fixed_t xmagnitude = abs(xstep) >> (32 - xbits - FRACBITS); + fixed_t ymagnitude = abs(ystep) >> (32 - ybits - FRACBITS); + fixed_t magnitude = (xmagnitude + ymagnitude) * 2 + (1 << (FRACBITS - 1)); + if (magnitude >> FRACBITS == 0) + return false; - uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; - uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; + if (r_mipmap) + { + int level = magnitude >> (FRACBITS + 1); + while (level != 0) + { + if (xbits <= 2 || ybits <= 2) + break; - uint32_t red = (RPART(p00) * a * b + RPART(p01) * inv_a * b + RPART(p10) * a * inv_b + RPART(p11) * inv_a * inv_b + 127) >> 8; - uint32_t green = (GPART(p00) * a * b + GPART(p01) * inv_a * b + GPART(p10) * a * inv_b + GPART(p11) * inv_a * inv_b + 127) >> 8; - uint32_t blue = (BPART(p00) * a * b + BPART(p01) * inv_a * b + BPART(p10) * a * inv_b + BPART(p11) * inv_a * inv_b + 127) >> 8; - uint32_t alpha = (APART(p00) * a * b + APART(p01) * inv_a * b + APART(p10) * a * inv_b + APART(p11) * inv_a * inv_b + 127) >> 8; + source += (1 << (xbits)) * (1 << (ybits)); + xbits -= 1; + ybits -= 1; + level >>= 1; + } + } + return true; + } - return (alpha << 24) | (red << 16) | (green << 8) | blue; -} + FORCEINLINE static uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, int ybits, uint32_t ymax) + { + uint32_t half = 1 << (ybits - 1); + uint32_t y0 = (texturefracy - half) >> ybits; + if (y0 > ymax) + y0 = 0; + uint32_t y1 = y0 + 1; + if (y1 > ymax) + y1 = 0; -#define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, ybits) { \ + uint32_t p00 = col0[y0]; + uint32_t p01 = col0[y1]; + uint32_t p10 = col1[y0]; + uint32_t p11 = col1[y1]; + + uint32_t inv_b = texturefracx; + uint32_t inv_a = ((texturefracy + half) >> (ybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t red = (RPART(p00) * a * b + RPART(p01) * inv_a * b + RPART(p10) * a * inv_b + RPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t green = (GPART(p00) * a * b + GPART(p01) * inv_a * b + GPART(p10) * a * inv_b + GPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t blue = (BPART(p00) * a * b + BPART(p01) * inv_a * b + BPART(p10) * a * inv_b + BPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t alpha = (APART(p00) * a * b + APART(p01) * inv_a * b + APART(p10) * a * inv_b + APART(p11) * inv_a * inv_b + 127) >> 8; + + return (alpha << 24) | (red << 16) | (green << 8) | blue; + } + + FORCEINLINE static uint32_t sample_bilinear(const uint32_t *texture, dsfixed_t xfrac, dsfixed_t yfrac, int xbits, int ybits) + { + int xshift = (32 - xbits); + int yshift = (32 - ybits); + int xmask = (1 << xshift) - 1; + int ymask = (1 << yshift) - 1; + uint32_t xhalf = 1 << (xbits - 1); + uint32_t yhalf = 1 << (ybits - 1); + uint32_t x = (xfrac - xhalf) >> xbits; + uint32_t y = (yfrac - yhalf) >> ybits; + + uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; + uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)]; + uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; + uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)]; + + uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; + uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t red = (RPART(p00) * a * b + RPART(p01) * inv_a * b + RPART(p10) * a * inv_b + RPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t green = (GPART(p00) * a * b + GPART(p01) * inv_a * b + GPART(p10) * a * inv_b + GPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t blue = (BPART(p00) * a * b + BPART(p01) * inv_a * b + BPART(p10) * a * inv_b + BPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t alpha = (APART(p00) * a * b + APART(p01) * inv_a * b + APART(p10) * a * inv_b + APART(p11) * inv_a * inv_b + 127) >> 8; + + return (alpha << 24) | (red << 16) | (green << 8) | blue; + } +}; + +///////////////////////////////////////////////////////////////////////////// +// SSE/AVX shading macros: + +#define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, ybits, ymax) { \ uint32_t half = 1 << (ybits - 1); \ \ __m128i m127 = _mm_set1_epi16(127); \ fg = _mm_setzero_si128(); \ for (int i = 0; i < 4; i++) \ { \ - uint32_t y = (texturefracy[i] - half) >> ybits; \ + uint32_t y0 = (texturefracy[i] - half) >> ybits; \ + if (y0 > ymax) y0 = 0; \ + uint32_t y1 = y0 + 1; \ + if (y1 > ymax) y1 = 0; \ \ uint32_t inv_b = texturefracx[i]; \ uint32_t inv_a = ((texturefracy[i] + half) >> (ybits - 4)) & 15; \ @@ -537,8 +573,8 @@ FORCEINLINE uint32_t sample_bilinear(const uint32_t *texture, dsfixed_t xfrac, d __m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab); \ __m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb); \ \ - __m128i p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(col0[i] + y)), _mm_setzero_si128()); \ - __m128i p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(col1[i] + y)), _mm_setzero_si128()); \ + __m128i p0 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, col0[i][y1], col0[i][y0]), _mm_setzero_si128()); \ + __m128i p1 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, col1[i][y1], col1[i][y0]), _mm_setzero_si128()); \ \ __m128i tmp = _mm_adds_epu16(_mm_mullo_epi16(p0, ab_invab), _mm_mullo_epi16(p1, ainvb_invainvb)); \ __m128i color = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_srli_si128(tmp, 8), tmp), m127), 8); \ @@ -758,12 +794,16 @@ FORCEINLINE uint32_t calc_blend_bgalpha(uint32_t fg, uint32_t dest_alpha) { uint32_t alpha = fg >> 24; alpha += alpha >> 7; - return 256 - alpha; // (dest_alpha * (256 - alpha)) >> 8; + uint32_t inv_alpha = 256 - alpha; + return (dest_alpha * alpha + 256 * inv_alpha + 128) >> 8; } #define VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha) \ __m128i msrc_alpha = _mm_set1_epi16(src_alpha); \ - __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + __m128i mdest_alpha = _mm_set1_epi16(dest_alpha * 255 / 256); \ + __m128i m256 = _mm_set1_epi16(256); \ + __m128i m255 = _mm_set1_epi16(255); \ + __m128i m128 = _mm_set1_epi16(128); // Calculates the final alpha values to be used when combined with the source texture alpha channel #define VEC_CALC_BLEND_ALPHA(fg) \ @@ -772,8 +812,10 @@ FORCEINLINE uint32_t calc_blend_bgalpha(uint32_t fg, uint32_t dest_alpha) __m128i alpha_lo = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpacklo_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \ alpha_hi = _mm_add_epi16(alpha_hi, _mm_srli_epi16(alpha_hi, 7)); \ alpha_lo = _mm_add_epi16(alpha_lo, _mm_srli_epi16(alpha_lo, 7)); \ - bg_alpha_hi = _mm_sub_epi16(_mm_set1_epi16(256), alpha_hi); /* _mm_srli_epi16(_mm_mullo_epi16(_mm_sub_epi16(_mm_set1_epi16(256), alpha_hi), mdest_alpha), 8);*/ \ - bg_alpha_lo = _mm_sub_epi16(_mm_set1_epi16(256), alpha_lo); /* _mm_srli_epi16(_mm_mullo_epi16(_mm_sub_epi16(_mm_set1_epi16(256), alpha_lo), mdest_alpha), 8);*/ \ + bg_alpha_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_mullo_epi16(mdest_alpha, alpha_hi), _mm_mullo_epi16(m255, _mm_sub_epi16(m256, alpha_hi))), m128), 8); \ + bg_alpha_hi = _mm_add_epi16(bg_alpha_hi, _mm_srli_epi16(bg_alpha_hi, 7)); \ + bg_alpha_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_mullo_epi16(mdest_alpha, alpha_lo), _mm_mullo_epi16(m255, _mm_sub_epi16(m256, alpha_lo))), m128), 8); \ + bg_alpha_lo = _mm_add_epi16(bg_alpha_lo, _mm_srli_epi16(bg_alpha_lo, 7)); \ fg_alpha_hi = msrc_alpha; \ fg_alpha_lo = msrc_alpha; \ } diff --git a/src/r_draw_rgba_sse.h b/src/r_draw_rgba_sse.h index af761c6e7..408a2f5a2 100644 --- a/src/r_draw_rgba_sse.h +++ b/src/r_draw_rgba_sse.h @@ -43,7 +43,7 @@ public: _destorg = dc_destorg; _light = ds_light; _shade_constants = ds_shade_constants; - _magnifying = !span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); + _magnifying = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); } void Execute(DrawerThread *thread) override @@ -70,7 +70,7 @@ public: xstep = _xstep; ystep = _ystep; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; if (_magnifying) @@ -166,7 +166,7 @@ public: spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); + *dest++ = LightBgra::shade_bgra(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -258,7 +258,7 @@ public: spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); + *dest++ = LightBgra::shade_bgra(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -305,7 +305,7 @@ public: do { - *dest++ = shade_bgra(sample_bilinear(source, xfrac, yfrac, 26, 26), light, shade_constants); + *dest++ = LightBgra::shade_bgra(SampleBgra::sample_bilinear(source, xfrac, yfrac, 26, 26), light, shade_constants); xfrac += xstep; yfrac += ystep; } while (--count); @@ -349,7 +349,7 @@ public: do { - *dest++ = shade_bgra(sample_bilinear(source, xfrac, yfrac, 32 - _xbits, 32 - _ybits), light, shade_constants); + *dest++ = LightBgra::shade_bgra(SampleBgra::sample_bilinear(source, xfrac, yfrac, 32 - _xbits, 32 - _ybits), light, shade_constants); xfrac += xstep; yfrac += ystep; } while (--count); @@ -364,7 +364,8 @@ class VecCommand(Vlinec4RGBA) : public DrawerCommand int _count; int _pitch; ShadeConstants _shade_constants; - int vlinebits; + int _vlinebits; + uint32_t _vlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; @@ -379,7 +380,8 @@ public: _count = dc_count; _pitch = dc_pitch; _shade_constants = dc_shade_constants; - vlinebits = ::vlinebits; + _vlinebits = vlinebits; + _vlinemax = vlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; @@ -398,13 +400,13 @@ public: return; uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int bits = vlinebits; + int bits = _vlinebits; int pitch = _pitch * thread->num_cores; - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); + uint32_t light0 = LightBgra::calc_light_multiplier(palookuplight[0]); + uint32_t light1 = LightBgra::calc_light_multiplier(palookuplight[1]); + uint32_t light2 = LightBgra::calc_light_multiplier(palookuplight[2]); + uint32_t light3 = LightBgra::calc_light_multiplier(palookuplight[3]); ShadeConstants shade_constants = _shade_constants; @@ -480,7 +482,7 @@ public: do { __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits); + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits, _vlinemax); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -498,7 +500,7 @@ public: do { __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits); + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits, _vlinemax); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -520,7 +522,8 @@ class VecCommand(Mvlinec4RGBA) : public DrawerCommand int _count; int _pitch; ShadeConstants _shade_constants; - int mvlinebits; + int _mvlinebits; + uint32_t _mvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; @@ -535,7 +538,8 @@ public: _count = dc_count; _pitch = dc_pitch; _shade_constants = dc_shade_constants; - mvlinebits = ::mvlinebits; + _mvlinebits = mvlinebits; + _mvlinemax = mvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; @@ -555,12 +559,12 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = mvlinebits; + int bits = _mvlinebits; - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); + uint32_t light0 = LightBgra::calc_light_multiplier(palookuplight[0]); + uint32_t light1 = LightBgra::calc_light_multiplier(palookuplight[1]); + uint32_t light2 = LightBgra::calc_light_multiplier(palookuplight[2]); + uint32_t light3 = LightBgra::calc_light_multiplier(palookuplight[3]); ShadeConstants shade_constants = _shade_constants; @@ -640,7 +644,7 @@ public: do { __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits); + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits, _mvlinemax); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -660,7 +664,7 @@ public: do { __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits); + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits, _mvlinemax); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -686,7 +690,8 @@ class VecCommand(Tmvline4AddRGBA) : public DrawerCommand ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - int tmvlinebits; + int _tmvlinebits; + uint32_t _tmvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; @@ -701,7 +706,8 @@ public: _shade_constants = dc_shade_constants; _srcalpha = dc_srcalpha; _destalpha = dc_destalpha; - tmvlinebits = ::tmvlinebits; + _tmvlinebits = tmvlinebits; + _tmvlinemax = tmvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; @@ -719,13 +725,13 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = tmvlinebits; + int bits = _tmvlinebits; uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); + light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); + light[1] = LightBgra::calc_light_multiplier(palookuplight[1]); + light[2] = LightBgra::calc_light_multiplier(palookuplight[2]); + light[3] = LightBgra::calc_light_multiplier(palookuplight[3]); ShadeConstants shade_constants = _shade_constants; @@ -825,7 +831,8 @@ class VecCommand(Tmvline4AddClampRGBA) : public DrawerCommand ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - int tmvlinebits; + int _tmvlinebits; + uint32_t _tmvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; @@ -840,7 +847,8 @@ public: _shade_constants = dc_shade_constants; _srcalpha = dc_srcalpha; _destalpha = dc_destalpha; - tmvlinebits = ::tmvlinebits; + _tmvlinebits = tmvlinebits; + _tmvlinemax = tmvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; @@ -858,13 +866,13 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = tmvlinebits; + int bits = _tmvlinebits; uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); + light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); + light[1] = LightBgra::calc_light_multiplier(palookuplight[1]); + light[2] = LightBgra::calc_light_multiplier(palookuplight[2]); + light[3] = LightBgra::calc_light_multiplier(palookuplight[3]); ShadeConstants shade_constants = _shade_constants; @@ -963,7 +971,8 @@ class VecCommand(Tmvline4SubClampRGBA) : public DrawerCommand ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - int tmvlinebits; + int _tmvlinebits; + uint32_t _tmvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; @@ -978,7 +987,8 @@ public: _shade_constants = dc_shade_constants; _srcalpha = dc_srcalpha; _destalpha = dc_destalpha; - tmvlinebits = ::tmvlinebits; + _tmvlinebits = tmvlinebits; + _tmvlinemax = tmvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; @@ -996,13 +1006,13 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = tmvlinebits; + int bits = _tmvlinebits; uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); + light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); + light[1] = LightBgra::calc_light_multiplier(palookuplight[1]); + light[2] = LightBgra::calc_light_multiplier(palookuplight[2]); + light[3] = LightBgra::calc_light_multiplier(palookuplight[3]); ShadeConstants shade_constants = _shade_constants; @@ -1101,7 +1111,8 @@ class VecCommand(Tmvline4RevSubClampRGBA) : public DrawerCommand ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - int tmvlinebits; + int _tmvlinebits; + uint32_t _tmvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; @@ -1116,7 +1127,8 @@ public: _shade_constants = dc_shade_constants; _srcalpha = dc_srcalpha; _destalpha = dc_destalpha; - tmvlinebits = ::tmvlinebits; + _tmvlinebits = tmvlinebits; + _tmvlinemax = tmvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; @@ -1134,13 +1146,13 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = tmvlinebits; + int bits = _tmvlinebits; uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); + light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); + light[1] = LightBgra::calc_light_multiplier(palookuplight[1]); + light[2] = LightBgra::calc_light_multiplier(palookuplight[2]); + light[3] = LightBgra::calc_light_multiplier(palookuplight[3]); ShadeConstants shade_constants = _shade_constants; diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index e239674e8..c39fdc287 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -185,7 +185,7 @@ public: if (count <= 0) return; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); @@ -196,7 +196,7 @@ public: BYTE *colormap = _colormap; if (count & 1) { - *dest = shade_pal_index(colormap[*source], light, shade_constants); + *dest = LightBgra::shade_pal_index(colormap[*source], light, shade_constants); source += sincr; dest += pitch; } @@ -204,8 +204,8 @@ public: return; do { - dest[0] = shade_pal_index(colormap[source[0]], light, shade_constants); - dest[pitch] = shade_pal_index(colormap[source[sincr]], light, shade_constants); + dest[0] = LightBgra::shade_pal_index(colormap[source[0]], light, shade_constants); + dest[pitch] = LightBgra::shade_pal_index(colormap[source[sincr]], light, shade_constants); source += sincr * 2; dest += pitch * 2; } while (--count); @@ -249,7 +249,7 @@ public: if (count <= 0) return; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); @@ -260,10 +260,10 @@ public: BYTE *colormap = _colormap; if (count & 1) { - dest[0] = shade_pal_index(colormap[source[0]], light, shade_constants); - dest[1] = shade_pal_index(colormap[source[1]], light, shade_constants); - dest[2] = shade_pal_index(colormap[source[2]], light, shade_constants); - dest[3] = shade_pal_index(colormap[source[3]], light, shade_constants); + dest[0] = LightBgra::shade_pal_index(colormap[source[0]], light, shade_constants); + dest[1] = LightBgra::shade_pal_index(colormap[source[1]], light, shade_constants); + dest[2] = LightBgra::shade_pal_index(colormap[source[2]], light, shade_constants); + dest[3] = LightBgra::shade_pal_index(colormap[source[3]], light, shade_constants); source += sincr; dest += pitch; } @@ -271,14 +271,14 @@ public: return; do { - dest[0] = shade_pal_index(colormap[source[0]], light, shade_constants); - dest[1] = shade_pal_index(colormap[source[1]], light, shade_constants); - dest[2] = shade_pal_index(colormap[source[2]], light, shade_constants); - dest[3] = shade_pal_index(colormap[source[3]], light, shade_constants); - dest[pitch] = shade_pal_index(colormap[source[sincr]], light, shade_constants); - dest[pitch + 1] = shade_pal_index(colormap[source[sincr + 1]], light, shade_constants); - dest[pitch + 2] = shade_pal_index(colormap[source[sincr + 2]], light, shade_constants); - dest[pitch + 3] = shade_pal_index(colormap[source[sincr + 3]], light, shade_constants); + dest[0] = LightBgra::shade_pal_index(colormap[source[0]], light, shade_constants); + dest[1] = LightBgra::shade_pal_index(colormap[source[1]], light, shade_constants); + dest[2] = LightBgra::shade_pal_index(colormap[source[2]], light, shade_constants); + dest[3] = LightBgra::shade_pal_index(colormap[source[3]], light, shade_constants); + dest[pitch] = LightBgra::shade_pal_index(colormap[source[sincr]], light, shade_constants); + dest[pitch + 1] = LightBgra::shade_pal_index(colormap[source[sincr + 1]], light, shade_constants); + dest[pitch + 2] = LightBgra::shade_pal_index(colormap[source[sincr + 2]], light, shade_constants); + dest[pitch + 3] = LightBgra::shade_pal_index(colormap[source[sincr + 3]], light, shade_constants); source += sincr * 2; dest += pitch * 2; } while (--count); @@ -453,7 +453,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; BYTE *colormap = _colormap; @@ -461,7 +461,7 @@ public: uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[*source], light, shade_constants); + uint32_t fg = LightBgra::shade_pal_index(colormap[*source], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -528,7 +528,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; BYTE *colormap = _colormap; @@ -538,7 +538,7 @@ public: do { for (int i = 0; i < 4; i++) { - uint32_t fg = shade_pal_index(colormap[source[i]], light, shade_constants); + uint32_t fg = LightBgra::shade_pal_index(colormap[source[i]], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -606,7 +606,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); + uint32_t fg = LightBgra::shade_pal_index_simple(_color, LightBgra::calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -674,7 +674,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); + uint32_t fg = LightBgra::shade_pal_index_simple(_color, LightBgra::calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -747,14 +747,14 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(*source, light, shade_constants); + uint32_t fg = LightBgra::shade_pal_index(*source, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -818,7 +818,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); @@ -827,7 +827,7 @@ public: do { for (int i = 0; i < 4; i++) { - uint32_t fg = shade_pal_index(source[i], light, shade_constants); + uint32_t fg = LightBgra::shade_pal_index(source[i], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -894,14 +894,14 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(*source, light, shade_constants); + uint32_t fg = LightBgra::shade_pal_index(*source, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -965,7 +965,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); @@ -974,7 +974,7 @@ public: do { for (int i = 0; i < 4; i++) { - uint32_t fg = shade_pal_index(source[i], light, shade_constants); + uint32_t fg = LightBgra::shade_pal_index(source[i], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1042,14 +1042,14 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(*source, light, shade_constants); + uint32_t fg = LightBgra::shade_pal_index(*source, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1113,7 +1113,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); @@ -1122,7 +1122,7 @@ public: do { for (int i = 0; i < 4; i++) { - uint32_t fg = shade_pal_index(source[i], light, shade_constants); + uint32_t fg = LightBgra::shade_pal_index(source[i], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; diff --git a/src/r_drawt_rgba_sse.h b/src/r_drawt_rgba_sse.h index 684be2b6a..64a77e288 100644 --- a/src/r_drawt_rgba_sse.h +++ b/src/r_drawt_rgba_sse.h @@ -48,7 +48,7 @@ public: return; ShadeConstants shade_constants = _shade_constants; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); @@ -207,7 +207,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; BYTE *colormap = _colormap; @@ -335,7 +335,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(_color, calc_light_multiplier(_light))), _mm_setzero_si128()); + __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(LightBgra::shade_pal_index_simple(_color, LightBgra::calc_light_multiplier(_light))), _mm_setzero_si128()); __m128i alpha_one = _mm_set1_epi16(64); do { @@ -411,7 +411,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); @@ -538,7 +538,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; ShadeConstants shade_constants = _shade_constants; @@ -664,7 +664,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; ShadeConstants shade_constants = _shade_constants; diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 6913db918..0ede451e0 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -514,8 +514,8 @@ void R_MapColoredPlane_rgba(int y, int x1) { uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; int count = (spanend[y] - x1 + 1); - uint32_t light = calc_light_multiplier(ds_light); - uint32_t color = shade_pal_index_simple(ds_color, light); + uint32_t light = LightBgra::calc_light_multiplier(ds_light); + uint32_t color = LightBgra::shade_pal_index_simple(ds_color, light); for (int i = 0; i < count; i++) dest[i] = color; } diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 84c967d1d..95dd287aa 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1276,7 +1276,7 @@ typedef void(*Draw4ColumnsFuncPtr)(); void wallscan_any( int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x), - void(setupwallscan(int bits,Draw1ColumnFuncPtr &draw1, Draw4ColumnsFuncPtr &draw2))) + void(setupwallscan(int bits, int fracmax, Draw1ColumnFuncPtr &draw1, Draw4ColumnsFuncPtr &draw2))) { if (rw_pic->UseType == FTexture::TEX_Null) return; @@ -1286,7 +1286,7 @@ void wallscan_any( DWORD(*draw1column)(); void(*draw4columns)(); - setupwallscan(32 - rw_pic->HeightBits, draw1column, draw4columns); + setupwallscan(32 - rw_pic->HeightBits, (rw_pic->GetHeight() - 1) << (32 - rw_pic->HeightBits), draw1column, draw4columns); bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); if (fixed) @@ -1439,9 +1439,9 @@ void wallscan_any( void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, int fracmax, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) { - setupvline(bits); + setupvline(bits, fracmax); line1 = dovline1; line4 = dovline4; }); @@ -1455,9 +1455,9 @@ void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t } else { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, int fracmax, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) { - setupmvline(bits); + setupmvline(bits, fracmax); line1 = domvline1; line4 = domvline4; }); @@ -1475,9 +1475,9 @@ void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fi } else { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, int fracmax, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) { - setuptmvline(bits); + setuptmvline(bits, fracmax); line1 = reinterpret_cast(tmvline1); line4 = tmvline4; }); diff --git a/src/r_things.cpp b/src/r_things.cpp index e1f1017f3..74707ff72 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -2732,7 +2732,7 @@ void R_DrawParticle_rgba(vissprite_t *vis) DrawerCommandQueue::WaitForWorkers(); - uint32_t fg = shade_pal_index_simple(color, calc_light_multiplier(LIGHTSCALE(0, vis->Style.ColormapNum << FRACBITS))); + uint32_t fg = LightBgra::shade_pal_index_simple(color, LightBgra::calc_light_multiplier(LIGHTSCALE(0, vis->Style.ColormapNum << FRACBITS))); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; diff --git a/src/v_draw.cpp b/src/v_draw.cpp index fd12a1587..0fb433343 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -1026,7 +1026,7 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) { uint32_t *spot = (uint32_t*)GetBuffer() + oldyyshifted + xx; - uint32_t fg = shade_pal_index_simple(basecolor, calc_light_multiplier(0)); + uint32_t fg = LightBgra::shade_pal_index_simple(basecolor, LightBgra::calc_light_multiplier(0)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; From b7f32d1bfce120c6372d3ca453aaaba755f8207f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 24 Jun 2016 18:05:32 +0200 Subject: [PATCH 75/94] Added LoopIterator to the drawt family of drawers --- src/r_drawt_rgba.cpp | 1193 ++++++++++++------------------------------ 1 file changed, 330 insertions(+), 863 deletions(-) diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index c39fdc287..82932b1f2 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -84,8 +84,9 @@ extern unsigned int *horizspan[4]; ///////////////////////////////////////////////////////////////////////////// -class RtCopy1colRGBACommand : public DrawerCommand +class DrawerRt1colCommand : public DrawerCommand { +public: int hx; int sx; int yl; @@ -93,8 +94,14 @@ class RtCopy1colRGBACommand : public DrawerCommand BYTE * RESTRICT _destorg; int _pitch; -public: - RtCopy1colRGBACommand(int hx, int sx, int yl, int yh) + uint32_t _light; + ShadeConstants _shade_constants; + BYTE * RESTRICT _colormap; + + uint32_t _srcalpha; + uint32_t _destalpha; + + DrawerRt1colCommand(int hx, int sx, int yl, int yh) { this->hx = hx; this->sx = sx; @@ -103,185 +110,384 @@ public: _destorg = dc_destorg; _pitch = dc_pitch; + + _light = LightBgra::calc_light_multiplier(dc_light); + _shade_constants = dc_shade_constants; + _colormap = dc_colormap; + + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); } - void Execute(DrawerThread *thread) override + class LoopIterator { + public: uint32_t *source; uint32_t *dest; int count; int pitch, sincr; - count = thread->count_for_thread(yl, (yh - yl + 1)); - if (count <= 0) - return; + LoopIterator(DrawerRt1colCommand *command, DrawerThread *thread) + { + count = thread->count_for_thread(command->yl, (command->yh - command->yl + 1)); + if (count <= 0) + return; - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = thread->num_cores * 4; + dest = thread->dest_for_thread(command->yl, command->_pitch, ylookup[command->yl] + command->sx + (uint32_t*)command->_destorg); + source = &thread->dc_temp_rgba[command->yl * 4 + command->hx] + thread->skipped_by_thread(command->yl) * 4; + pitch = command->_pitch * thread->num_cores; + sincr = thread->num_cores * 4; + } - if (count & 1) { - *dest = GPalette.BaseColors[*source]; - source += sincr; + explicit operator bool() + { + return count > 0; + } + + bool next() + { dest += pitch; + source += sincr; + return (--count) != 0; } - if (count & 2) { - dest[0] = GPalette.BaseColors[source[0]]; - dest[pitch] = GPalette.BaseColors[source[sincr]]; - source += sincr * 2; - dest += pitch * 2; - } - if (!(count >>= 2)) - return; - - do { - dest[0] = GPalette.BaseColors[source[0]]; - dest[pitch] = GPalette.BaseColors[source[sincr]]; - dest[pitch * 2] = GPalette.BaseColors[source[sincr * 2]]; - dest[pitch * 3] = GPalette.BaseColors[source[sincr * 3]]; - source += sincr * 4; - dest += pitch * 4; - } while (--count); - } + }; }; -class RtMap1colRGBACommand : public DrawerCommand +class DrawerRt4colsCommand : public DrawerCommand { - int hx; +public: int sx; int yl; int yh; - fixed_t _light; + uint32_t _light; ShadeConstants _shade_constants; BYTE * RESTRICT _destorg; int _pitch; BYTE * RESTRICT _colormap; + uint32_t _srcalpha; + uint32_t _destalpha; -public: - RtMap1colRGBACommand(int hx, int sx, int yl, int yh) + DrawerRt4colsCommand(int sx, int yl, int yh) { - this->hx = hx; this->sx = sx; this->yl = yl; this->yh = yh; - _light = dc_light; + _light = LightBgra::calc_light_multiplier(dc_light); _shade_constants = dc_shade_constants; _destorg = dc_destorg; _pitch = dc_pitch; _colormap = dc_colormap; + + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); } - void Execute(DrawerThread *thread) override + class LoopIterator { + public: uint32_t *source; uint32_t *dest; int count; int pitch; int sincr; - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; + LoopIterator(DrawerRt4colsCommand *command, DrawerThread *thread) + { + count = thread->count_for_thread(command->yl, command->yh - command->yl + 1); + if (count <= 0) + return; - uint32_t light = LightBgra::calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = thread->num_cores * 4; - - BYTE *colormap = _colormap; - - if (count & 1) { - *dest = LightBgra::shade_pal_index(colormap[*source], light, shade_constants); - source += sincr; - dest += pitch; + dest = thread->dest_for_thread(command->yl, command->_pitch, ylookup[command->yl] + command->sx + (uint32_t*)command->_destorg); + source = &thread->dc_temp_rgba[command->yl * 4] + thread->skipped_by_thread(command->yl) * 4; + pitch = command->_pitch * thread->num_cores; + sincr = thread->num_cores * 4; } - if (!(count >>= 1)) - return; - do { - dest[0] = LightBgra::shade_pal_index(colormap[source[0]], light, shade_constants); - dest[pitch] = LightBgra::shade_pal_index(colormap[source[sincr]], light, shade_constants); - source += sincr * 2; - dest += pitch * 2; - } while (--count); - } + explicit operator bool() + { + return count > 0; + } + + bool next() + { + dest += pitch; + source += sincr; + return (--count) != 0; + } + }; }; -class RtMap4colsRGBACommand : public DrawerCommand +class RtCopy1colRGBACommand : public DrawerRt1colCommand { - int sx; - int yl; - int yh; - fixed_t _light; - ShadeConstants _shade_constants; - BYTE * RESTRICT _destorg; - int _pitch; - BYTE * RESTRICT _colormap; - public: - RtMap4colsRGBACommand(int sx, int yl, int yh) + RtCopy1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh) { - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _light = dc_light; - _shade_constants = dc_shade_constants; - _destorg = dc_destorg; - _pitch = dc_pitch; - _colormap = dc_colormap; } void Execute(DrawerThread *thread) override { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = GPalette.BaseColors[*loop.source]; + *loop.dest = BlendBgra::copy(fg); + } while (loop.next()); + } +}; - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; +class RtMap1colRGBACommand : public DrawerRt1colCommand +{ +public: + RtMap1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh) + { + } - uint32_t light = LightBgra::calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(_colormap[*loop.source], _light, _shade_constants); + *loop.dest = BlendBgra::copy(fg); + } while (loop.next()); + } +}; - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = thread->num_cores * 4; - - BYTE *colormap = _colormap; +class RtMap4colsRGBACommand : public DrawerRt4colsCommand +{ +public: + RtMap4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh) + { + } - if (count & 1) { - dest[0] = LightBgra::shade_pal_index(colormap[source[0]], light, shade_constants); - dest[1] = LightBgra::shade_pal_index(colormap[source[1]], light, shade_constants); - dest[2] = LightBgra::shade_pal_index(colormap[source[2]], light, shade_constants); - dest[3] = LightBgra::shade_pal_index(colormap[source[3]], light, shade_constants); - source += sincr; - dest += pitch; - } - if (!(count >>= 1)) - return; + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_pal_index(_colormap[loop.source[i]], _light, _shade_constants); + loop.dest[i] = BlendBgra::copy(fg); + } + } while (loop.next()); + } +}; - do { - dest[0] = LightBgra::shade_pal_index(colormap[source[0]], light, shade_constants); - dest[1] = LightBgra::shade_pal_index(colormap[source[1]], light, shade_constants); - dest[2] = LightBgra::shade_pal_index(colormap[source[2]], light, shade_constants); - dest[3] = LightBgra::shade_pal_index(colormap[source[3]], light, shade_constants); - dest[pitch] = LightBgra::shade_pal_index(colormap[source[sincr]], light, shade_constants); - dest[pitch + 1] = LightBgra::shade_pal_index(colormap[source[sincr + 1]], light, shade_constants); - dest[pitch + 2] = LightBgra::shade_pal_index(colormap[source[sincr + 2]], light, shade_constants); - dest[pitch + 3] = LightBgra::shade_pal_index(colormap[source[sincr + 3]], light, shade_constants); - source += sincr * 2; - dest += pitch * 2; - } while (--count); +class RtAdd1colRGBACommand : public DrawerRt1colCommand +{ +public: + RtAdd1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh) + { + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(_colormap[*loop.source], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; + +class RtAdd4colsRGBACommand : public DrawerRt4colsCommand +{ +public: + RtAdd4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh) + { + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_pal_index(_colormap[loop.source[i]], _light, _shade_constants); + loop.dest[i] = BlendBgra::add(fg, loop.dest[i], _srcalpha, _destalpha); + } + } while (loop.next()); + } +}; + +class RtShaded1colRGBACommand : public DrawerRt1colCommand +{ + uint32_t _color; + +public: + RtShaded1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh) + { + _color = LightBgra::shade_pal_index(dc_color, _light, _shade_constants); + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t alpha = _colormap[*loop.source] * 4; + uint32_t inv_alpha = 256 - alpha; + *loop.dest = BlendBgra::add(_color, *loop.dest, alpha, inv_alpha); + } while (loop.next()); + } +}; + +class RtShaded4colsRGBACommand : public DrawerRt4colsCommand +{ + uint32_t _color; + +public: + RtShaded4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh) + { + _color = LightBgra::shade_pal_index(dc_color, _light, _shade_constants); + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + for (int i = 0; i < 4; i++) + { + uint32_t alpha = _colormap[loop.source[i]] * 4; + uint32_t inv_alpha = 256 - alpha; + loop.dest[i] = BlendBgra::add(_color, loop.dest[i], alpha, inv_alpha); + } + } while (loop.next()); + } +}; + +class RtAddClamp1colRGBACommand : public DrawerRt1colCommand +{ +public: + RtAddClamp1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh) + { + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(*loop.source, _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; + +class RtAddClamp4colsRGBACommand : public DrawerRt4colsCommand +{ +public: + RtAddClamp4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh) + { + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_pal_index(loop.source[i], _light, _shade_constants); + loop.dest[i] = BlendBgra::add(fg, loop.dest[i], _srcalpha, _destalpha); + } + } while (loop.next()); + } +}; + +class RtSubClamp1colRGBACommand : public DrawerRt1colCommand +{ +public: + RtSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh) + { + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(*loop.source, _light, _shade_constants); + *loop.dest = BlendBgra::sub(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; + +class RtSubClamp4colsRGBACommand : public DrawerRt4colsCommand +{ +public: + RtSubClamp4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh) + { + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_pal_index(loop.source[i], _light, _shade_constants); + loop.dest[i] = BlendBgra::sub(fg, loop.dest[i], _srcalpha, _destalpha); + } + } while (loop.next()); + } +}; + +class RtRevSubClamp1colRGBACommand : public DrawerRt1colCommand +{ +public: + RtRevSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh) + { + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(*loop.source, _light, _shade_constants); + *loop.dest = BlendBgra::revsub(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; + +class RtRevSubClamp4colsRGBACommand : public DrawerRt4colsCommand +{ +public: + RtRevSubClamp4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh) + { + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_pal_index(loop.source[i], _light, _shade_constants); + loop.dest[i] = BlendBgra::revsub(fg, loop.dest[i], _srcalpha, _destalpha); + } + } while (loop.next()); } }; @@ -405,745 +611,6 @@ public: } }; -class RtAdd1colRGBACommand : public DrawerCommand -{ - int hx; - int sx; - int yl; - int yh; - BYTE * RESTRICT _destorg; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - BYTE * RESTRICT _colormap; - -public: - RtAdd1colRGBACommand(int hx, int sx, int yl, int yh) - { - this->hx = hx; - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _destorg = dc_destorg; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - _colormap = dc_colormap; - } - - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t light = LightBgra::calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - BYTE *colormap = _colormap; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do { - uint32_t fg = LightBgra::shade_pal_index(colormap[*source], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - - source += sincr; - dest += pitch; - } while (--count); - } -}; - -class RtAdd4colsRGBACommand : public DrawerCommand -{ - int sx; - int yl; - int yh; - BYTE * RESTRICT _destorg; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - BYTE * RESTRICT _colormap; - fixed_t _srcalpha; - fixed_t _destalpha; - -public: - RtAdd4colsRGBACommand(int sx, int yl, int yh) - { - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _destorg = dc_destorg; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _colormap = dc_colormap; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - } - - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t light = LightBgra::calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - BYTE *colormap = _colormap; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_pal_index(colormap[source[i]], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - - source += sincr; - dest += pitch; - } while (--count); - } -}; - -class RtShaded1colRGBACommand : public DrawerCommand -{ - int hx; - int sx; - int yl; - int yh; - lighttable_t * RESTRICT _colormap; - BYTE * RESTRICT _destorg; - int _pitch; - int _color; - fixed_t _light; - -public: - RtShaded1colRGBACommand(int hx, int sx, int yl, int yh) - { - this->hx = hx; - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _colormap = dc_colormap; - _destorg = dc_destorg; - _pitch = dc_pitch; - _color = dc_color; - _light = dc_light; - } - - void Execute(DrawerThread *thread) override - { - BYTE *colormap; - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - colormap = _colormap; - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t fg = LightBgra::shade_pal_index_simple(_color, LightBgra::calc_light_multiplier(_light)); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do { - uint32_t alpha = colormap[*source]; - uint32_t inv_alpha = 64 - alpha; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; - uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; - uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += sincr; - dest += pitch; - } while (--count); - } -}; - -class RtShaded4colsRGBACommand : public DrawerCommand -{ - int sx; - int yl; - int yh; - lighttable_t * RESTRICT _colormap; - int _color; - BYTE * RESTRICT _destorg; - int _pitch; - fixed_t _light; - -public: - RtShaded4colsRGBACommand(int sx, int yl, int yh) - { - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _colormap = dc_colormap; - _color = dc_color; - _destorg = dc_destorg; - _pitch = dc_pitch; - _light = dc_light; - } - - void Execute(DrawerThread *thread) override - { - BYTE *colormap; - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - colormap = _colormap; - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t fg = LightBgra::shade_pal_index_simple(_color, LightBgra::calc_light_multiplier(_light)); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do { - for (int i = 0; i < 4; i++) - { - uint32_t alpha = colormap[source[i]]; - uint32_t inv_alpha = 64 - alpha; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; - uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; - uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - source += sincr; - dest += pitch; - } while (--count); - } -}; - -class RtAddClamp1colRGBACommand : public DrawerCommand -{ - int hx; - int sx; - int yl; - int yh; - BYTE * RESTRICT _destorg; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - -public: - RtAddClamp1colRGBACommand(int hx, int sx, int yl, int yh) - { - this->hx = hx; - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _destorg = dc_destorg; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - } - - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t light = LightBgra::calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do { - uint32_t fg = LightBgra::shade_pal_index(*source, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += sincr; - dest += pitch; - } while (--count); - } -}; - -class RtAddClamp4colsRGBACommand : public DrawerCommand -{ - int sx; - int yl; - int yh; - BYTE * RESTRICT _destorg; - int _pitch; - fixed_t _light; - fixed_t _srcalpha; - fixed_t _destalpha; - ShadeConstants _shade_constants; - -public: - RtAddClamp4colsRGBACommand(int sx, int yl, int yh) - { - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _destorg = dc_destorg; - _pitch = dc_pitch; - _light = dc_light; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - _shade_constants = dc_shade_constants; - } - - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t light = LightBgra::calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_pal_index(source[i], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - source += sincr; - dest += pitch; - } while (--count); - } -}; - -class RtSubClamp1colRGBACommand : public DrawerCommand -{ - int hx; - int sx; - int yl; - int yh; - BYTE * RESTRICT _destorg; - int _pitch; - fixed_t _light; - fixed_t _srcalpha; - fixed_t _destalpha; - ShadeConstants _shade_constants; - -public: - RtSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) - { - this->hx = hx; - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _destorg = dc_destorg; - _pitch = dc_pitch; - _light = dc_light; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - _shade_constants = dc_shade_constants; - } - - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t light = LightBgra::calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do { - uint32_t fg = LightBgra::shade_pal_index(*source, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += sincr; - dest += pitch; - } while (--count); - } -}; - -class RtSubClamp4colsRGBACommand : public DrawerCommand -{ - int sx; - int yl; - int yh; - BYTE * RESTRICT _destorg; - int _pitch; - fixed_t _light; - fixed_t _srcalpha; - fixed_t _destalpha; - ShadeConstants _shade_constants; - -public: - RtSubClamp4colsRGBACommand(int sx, int yl, int yh) - { - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _destorg = dc_destorg; - _pitch = dc_pitch; - _light = dc_light; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - _shade_constants = dc_shade_constants; - } - - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t light = LightBgra::calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_pal_index(source[i], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - - source += sincr; - dest += pitch; - } while (--count); - } -}; - -class RtRevSubClamp1colRGBACommand : public DrawerCommand -{ - int hx; - int sx; - int yl; - int yh; - BYTE * RESTRICT _destorg; - int _pitch; - fixed_t _light; - fixed_t _srcalpha; - fixed_t _destalpha; - ShadeConstants _shade_constants; - -public: - RtRevSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) - { - this->hx = hx; - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _destorg = dc_destorg; - _pitch = dc_pitch; - _light = dc_light; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - _shade_constants = dc_shade_constants; - } - - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t light = LightBgra::calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do { - uint32_t fg = LightBgra::shade_pal_index(*source, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += sincr; - dest += pitch; - } while (--count); - } -}; - -class RtRevSubClamp4colsRGBACommand : public DrawerCommand -{ - int sx; - int yl; - int yh; - BYTE * RESTRICT _destorg; - int _pitch; - fixed_t _light; - fixed_t _srcalpha; - fixed_t _destalpha; - ShadeConstants _shade_constants; - -public: - RtRevSubClamp4colsRGBACommand(int sx, int yl, int yh) - { - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _destorg = dc_destorg; - _pitch = dc_pitch; - _light = dc_light; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - _shade_constants = dc_shade_constants; - } - - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t light = LightBgra::calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_pal_index(source[i], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - - source += sincr; - dest += pitch; - } while (--count); - } -}; - class RtInitColsRGBACommand : public DrawerCommand { BYTE * RESTRICT buff; From 8ec420a597ee40f52aa0de394d782784a02c6cb9 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 24 Jun 2016 19:05:04 +0200 Subject: [PATCH 76/94] Added support for more texture filtering control --- src/r_draw.h | 6 +++++- src/r_draw_rgba.cpp | 15 ++++++++------- src/r_draw_rgba.h | 9 +++------ src/r_draw_rgba_sse.h | 6 +++--- src/r_segs.cpp | 33 ++++++++++++++++++++++----------- 5 files changed, 41 insertions(+), 28 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index bd477efc4..6a078b08f 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -379,6 +379,10 @@ void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade); void R_SetTranslationMap(lighttable_t *translation); extern bool r_swtruecolor; -EXTERN_CVAR(Bool, r_bilinear); + +EXTERN_CVAR(Bool, r_multithreaded); +EXTERN_CVAR(Bool, r_magfilter_linear); +EXTERN_CVAR(Bool, r_minfilter_linear); +EXTERN_CVAR(Bool, r_mipmap); #endif diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index dc97fdd47..5a6e88e3b 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -60,9 +60,10 @@ extern float rw_light; extern float rw_lightstep; extern int wallshade; -CVAR(Bool, r_multithreaded, true, 0) -CVAR(Bool, r_bilinear, true, 0) -CVAR(Bool, r_mipmap, true, 0) +CVAR(Bool, r_multithreaded, true, 0); +CVAR(Bool, r_magfilter_linear, false, 0); +CVAR(Bool, r_minfilter_linear, false, 0); +CVAR(Bool, r_mipmap, true, 0); #ifndef NO_SSE @@ -904,7 +905,7 @@ public: const uint32_t * RESTRICT _source; uint32_t _light; ShadeConstants _shade_constants; - bool _magnifying; + bool _nearest_filter; uint32_t _srcalpha; uint32_t _destalpha; @@ -925,7 +926,7 @@ public: _source = (const uint32_t*)ds_source; _light = LightBgra::calc_light_multiplier(ds_light); _shade_constants = ds_shade_constants; - _magnifying = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); + _nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); _srcalpha = dc_srcalpha >> (FRACBITS - 8); _destalpha = dc_destalpha >> (FRACBITS - 8); @@ -995,7 +996,7 @@ public: LoopIterator loop(this, thread); if (!loop) return; - if (_magnifying) + if (_nearest_filter) { if (loop.is_64x64) { @@ -1040,7 +1041,7 @@ public: LoopIterator loop(this, thread); if (!loop) return; - if (_magnifying) + if (_nearest_filter) { if (loop.is_64x64) { diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 20fff4fc0..56f1faa24 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -461,15 +461,11 @@ class SampleBgra public: inline static bool span_sampler_setup(const uint32_t * RESTRICT &source, int &xbits, int &ybits, fixed_t xstep, fixed_t ystep) { - if (!r_bilinear) - return false; - // Is this a magfilter or minfilter? fixed_t xmagnitude = abs(xstep) >> (32 - xbits - FRACBITS); fixed_t ymagnitude = abs(ystep) >> (32 - ybits - FRACBITS); fixed_t magnitude = (xmagnitude + ymagnitude) * 2 + (1 << (FRACBITS - 1)); - if (magnitude >> FRACBITS == 0) - return false; + bool magnifying = (magnitude >> FRACBITS == 0); if (r_mipmap) { @@ -485,7 +481,8 @@ public: level >>= 1; } } - return true; + + return (magnifying && r_magfilter_linear) || (!magnifying && r_minfilter_linear); } FORCEINLINE static uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, int ybits, uint32_t ymax) diff --git a/src/r_draw_rgba_sse.h b/src/r_draw_rgba_sse.h index 408a2f5a2..bca30185c 100644 --- a/src/r_draw_rgba_sse.h +++ b/src/r_draw_rgba_sse.h @@ -25,7 +25,7 @@ class VecCommand(DrawSpanRGBA) : public DrawerCommand BYTE * RESTRICT _destorg; fixed_t _light; ShadeConstants _shade_constants; - bool _magnifying; + bool _nearest_filter; public: VecCommand(DrawSpanRGBA)() @@ -43,7 +43,7 @@ public: _destorg = dc_destorg; _light = ds_light; _shade_constants = ds_shade_constants; - _magnifying = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); + _nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); } void Execute(DrawerThread *thread) override @@ -73,7 +73,7 @@ public: uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; - if (_magnifying) + if (_nearest_filter) { if (_xbits == 6 && _ybits == 6) { diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 95dd287aa..5c9037375 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -50,6 +50,7 @@ #include "r_plane.h" #include "r_segs.h" #include "r_3dfloors.h" +#include "r_draw.h" #include "v_palette.h" #include "r_data/colormaps.h" @@ -58,8 +59,6 @@ CVAR(Bool, r_np2, true, 0) -EXTERN_CVAR(Bool, r_bilinear) - //CVAR (Int, ty, 8, 0) //CVAR (Int, tx, 8, 0) @@ -1104,8 +1103,7 @@ WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xof bool magnifying = uv_step >> (uv_fracbits - 1) == 0; - // Only do bilinear filtering if enabled and not a magnifying filter - if (!r_swtruecolor || !r_bilinear || magnifying || getcol != R_GetColumn) + if (!r_swtruecolor || getcol != R_GetColumn) { source = getcol(texture, xoffset >> FRACBITS); source2 = nullptr; @@ -1138,13 +1136,26 @@ WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xof const uint32_t *pixels = texture->GetPixelsBgra() + mipmap_offset; - int tx0 = ((xoffset - FRACUNIT / 2) >> FRACBITS) % mip_width; - if (tx0 < 0) - tx0 += mip_width; - int tx1 = (tx0 + 1) % mip_width; - source = (BYTE*)(pixels + tx0 * mip_height); - source2 = (BYTE*)(pixels + tx1 * mip_height); - texturefracx = ((xoffset + FRACUNIT / 2) >> (FRACBITS - 4)) & 15; + bool filter_nearest = (magnifying && !r_magfilter_linear) || (!magnifying && !r_minfilter_linear); + if (filter_nearest) + { + int tx = (xoffset >> FRACBITS) % mip_width; + if (tx < 0) + tx += mip_width; + source = (BYTE*)(pixels + tx * mip_height); + source2 = nullptr; + texturefracx = 0; + } + else + { + int tx0 = ((xoffset - FRACUNIT / 2) >> FRACBITS) % mip_width; + if (tx0 < 0) + tx0 += mip_width; + int tx1 = (tx0 + 1) % mip_width; + source = (BYTE*)(pixels + tx0 * mip_height); + source2 = (BYTE*)(pixels + tx1 * mip_height); + texturefracx = ((xoffset + FRACUNIT / 2) >> (FRACBITS - 4)) & 15; + } } } From 77054639666f967c7f885e63205ed7978203d3f0 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 25 Jun 2016 10:33:35 +0200 Subject: [PATCH 77/94] Improved linear filtering of walls Fixed some crash bugs Added mipmap and filtering options to the display menu --- src/r_draw.cpp | 16 +- src/r_draw.h | 13 +- src/r_draw_rgba.cpp | 120 ++++----------- src/r_draw_rgba.h | 35 ++--- src/r_draw_rgba_sse.h | 194 +++++++++++++----------- src/r_segs.cpp | 292 +++++++++++++++++++++++-------------- src/textures/textures.h | 4 + wadsrc/static/language.enu | 3 + wadsrc/static/menudef.txt | 3 + 9 files changed, 364 insertions(+), 316 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 578ca9646..682ed4668 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -154,6 +154,7 @@ int dc_yl; int dc_yh; fixed_t dc_iscale; fixed_t dc_texturefrac; +uint32_t dc_textureheight; int dc_color; // [RH] Color for column filler DWORD dc_srccolor; uint32_t dc_srccolor_bgra; @@ -177,6 +178,7 @@ fixed_t palookuplight[4]; const BYTE* bufplce[4]; const BYTE* bufplce2[4]; uint32_t buftexturefracx[4]; +uint32_t bufheight[4]; // just for profiling int dccount; @@ -1044,6 +1046,7 @@ int ds_ybits; // start of a floor/ceiling tile image const BYTE* ds_source; +bool ds_source_mipmapped; // just for profiling int dscount; @@ -1067,6 +1070,7 @@ extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource; void R_SetSpanSource(FTexture *tex) { ds_source = r_swtruecolor ? (const BYTE*)tex->GetPixelsBgra() : tex->GetPixels(); + ds_source_mipmapped = tex->Mipmapped(); #ifdef X86_ASM if (!r_swtruecolor && ds_cursource != ds_source) { @@ -1644,8 +1648,6 @@ extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *v int vlinebits; int mvlinebits; -uint32_t vlinemax; -uint32_t mvlinemax; #ifndef X86_ASM static DWORD vlinec1 (); @@ -1695,12 +1697,11 @@ DWORD (*domvline1)() = mvlineasm1; void (*domvline4)() = mvlineasm4; #endif -void setupvline (int fracbits, int fracmax) +void setupvline (int fracbits) { if (r_swtruecolor) { vlinebits = fracbits; - vlinemax = fracmax; return; } @@ -1780,7 +1781,7 @@ void vlinec4 () } #endif -void setupmvline (int fracbits, int fracmax) +void setupmvline (int fracbits) { if (!r_swtruecolor) { @@ -1795,7 +1796,6 @@ void setupmvline (int fracbits, int fracmax) else { mvlinebits = fracbits; - mvlinemax = fracmax; } } @@ -1968,12 +1968,10 @@ void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip) } int tmvlinebits; -uint32_t tmvlinemax; -void setuptmvline (int bits, int fracmax) +void setuptmvline (int bits) { tmvlinebits = bits; - tmvlinemax = fracmax; } fixed_t tmvline1_add_C () diff --git a/src/r_draw.h b/src/r_draw.h index 6a078b08f..591ae0b5f 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -61,6 +61,7 @@ extern "C" int dc_yh; extern "C" fixed_t dc_iscale; extern double dc_texturemid; extern "C" fixed_t dc_texturefrac; +extern "C" uint32_t dc_textureheight; extern "C" int dc_color; // [RH] For flat colors (no texturing) extern "C" DWORD dc_srccolor; extern "C" uint32_t dc_srccolor_bgra; @@ -84,6 +85,7 @@ extern "C" fixed_t palookuplight[4]; extern "C" const BYTE* bufplce[4]; extern "C" const BYTE* bufplce2[4]; extern "C" uint32_t buftexturefracx[4]; +extern "C" uint32_t bufheight[4]; // [RH] Temporary buffer for column drawing extern "C" BYTE *dc_temp; @@ -100,13 +102,13 @@ extern void (*R_DrawColumn)(void); extern DWORD (*dovline1) (); extern DWORD (*doprevline1) (); extern void (*dovline4) (); -extern void setupvline (int,int); +extern void setupvline (int); extern DWORD (*domvline1) (); extern void (*domvline4) (); -extern void setupmvline (int,int); +extern void setupmvline (int); -extern void setuptmvline (int,int); +extern void setuptmvline (int); // The Spectre/Invisibility effect. extern void (*R_DrawFuzzColumn)(void); @@ -316,6 +318,7 @@ extern "C" fixed_t ds_alpha; // start of a 64*64 tile image extern "C" const BYTE* ds_source; +extern "C" bool ds_source_mipmapped; extern "C" int ds_color; // [RH] For flat color (no texturing) @@ -381,8 +384,8 @@ void R_SetTranslationMap(lighttable_t *translation); extern bool r_swtruecolor; EXTERN_CVAR(Bool, r_multithreaded); -EXTERN_CVAR(Bool, r_magfilter_linear); -EXTERN_CVAR(Bool, r_minfilter_linear); +EXTERN_CVAR(Bool, r_magfilter); +EXTERN_CVAR(Bool, r_minfilter); EXTERN_CVAR(Bool, r_mipmap); #endif diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 5a6e88e3b..2576cfeda 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -48,22 +48,22 @@ #endif #include -extern int vlinebits; -extern int mvlinebits; -extern int tmvlinebits; -extern uint32_t vlinemax; -extern uint32_t mvlinemax; -extern uint32_t tmvlinemax; - extern "C" short spanend[MAXHEIGHT]; extern float rw_light; extern float rw_lightstep; extern int wallshade; +// Use multiple threads when drawing CVAR(Bool, r_multithreaded, true, 0); -CVAR(Bool, r_magfilter_linear, false, 0); -CVAR(Bool, r_minfilter_linear, false, 0); -CVAR(Bool, r_mipmap, true, 0); + +// Use linear filtering when scaling up +CVAR(Bool, r_magfilter, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); + +// Use linear filtering when scaling down +CVAR(Bool, r_minfilter, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); + +// Use mipmapped textures +CVAR(Bool, r_mipmap, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); #ifndef NO_SSE @@ -926,7 +926,7 @@ public: _source = (const uint32_t*)ds_source; _light = LightBgra::calc_light_multiplier(ds_light); _shade_constants = ds_shade_constants; - _nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); + _nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep, ds_source_mipmapped); _srcalpha = dc_srcalpha >> (FRACBITS - 8); _destalpha = dc_destalpha >> (FRACBITS - 8); @@ -1354,8 +1354,7 @@ public: DWORD _texturefrac; uint32_t _texturefracx; DWORD _iscale; - int _vlinebits; - uint32_t _vlinemax; + uint32_t _textureheight; const uint32 * RESTRICT _source; const uint32 * RESTRICT _source2; @@ -1365,7 +1364,7 @@ public: uint32_t _srcalpha; uint32_t _destalpha; - DrawerWall1Command(int vlinebits, uint32_t vlinemax) + DrawerWall1Command() { _dest = dc_dest; _pitch = dc_pitch; @@ -1373,8 +1372,7 @@ public: _texturefrac = dc_texturefrac; _texturefracx = dc_texturefracx; _iscale = dc_iscale; - _vlinebits = vlinebits; - _vlinemax = vlinemax; + _textureheight = dc_textureheight; _source = (const uint32 *)dc_source; _source2 = (const uint32 *)dc_source2; @@ -1394,7 +1392,8 @@ public: uint32_t fracstep; uint32_t frac; uint32_t texturefracx; - int bits; + uint32_t height; + uint32_t half; LoopIterator(DrawerWall1Command *command, DrawerThread *thread) { @@ -1406,8 +1405,10 @@ public: frac = command->_texturefrac + command->_iscale * thread->skipped_by_thread(command->_dest_y); texturefracx = command->_texturefracx; dest = thread->dest_for_thread(command->_dest_y, command->_pitch, (uint32_t*)command->_dest); - bits = command->_vlinebits; pitch = command->_pitch * thread->num_cores; + + height = command->_textureheight; + half = (0x80000000 + height - 1) / height; } explicit operator bool() @@ -1417,7 +1418,7 @@ public: int sample_index() { - return frac >> bits; + return ((frac >> FRACBITS) * height) >> FRACBITS; } bool next() @@ -1435,12 +1436,11 @@ public: BYTE * RESTRICT _dest; int _count; int _pitch; - int _vlinebits; - uint32_t _vlinemax; ShadeConstants _shade_constants; uint32_t _vplce[4]; uint32_t _vince[4]; uint32_t _buftexturefracx[4]; + uint32_t _bufheight[4]; const uint32_t * RESTRICT _bufplce[4]; const uint32_t * RESTRICT _bufplce2[4]; uint32_t _light[4]; @@ -1448,19 +1448,18 @@ public: uint32_t _srcalpha; uint32_t _destalpha; - DrawerWall4Command(int vlinebits, uint32_t vlinemax) + DrawerWall4Command() { _dest = dc_dest; _count = dc_count; _pitch = dc_pitch; - _vlinebits = vlinebits; - _vlinemax = vlinemax; _shade_constants = dc_shade_constants; for (int i = 0; i < 4; i++) { _vplce[i] = vplce[i]; _vince[i] = vince[i]; _buftexturefracx[i] = buftexturefracx[i]; + _bufheight[i] = bufheight[i]; _bufplce[i] = (const uint32_t *)bufplce[i]; _bufplce2[i] = (const uint32_t *)bufplce2[i]; _light[i] = LightBgra::calc_light_multiplier(palookuplight[i]); @@ -1475,9 +1474,10 @@ public: uint32_t *dest; int pitch; int count; - int bits; uint32_t vplce[4]; uint32_t vince[4]; + uint32_t height[4]; + uint32_t half[4]; LoopIterator(DrawerWall4Command *command, DrawerThread *thread) { @@ -1487,13 +1487,14 @@ public: dest = thread->dest_for_thread(command->_dest_y, command->_pitch, (uint32_t*)command->_dest); pitch = command->_pitch * thread->num_cores; - bits = command->_vlinebits; int skipped = thread->skipped_by_thread(command->_dest_y); for (int i = 0; i < 4; i++) { vplce[i] = command->_vplce[i] + command->_vince[i] * skipped; vince[i] = command->_vince[i] * thread->num_cores; + height[i] = command->_bufheight[i]; + half[i] = (0x80000000 + height[i] - 1) / height[i]; } } @@ -1504,7 +1505,7 @@ public: int sample_index(int col) { - return vplce[col] >> bits; + return ((vplce[col] >> FRACBITS) * height[col]) >> FRACBITS; } bool next() @@ -1522,10 +1523,6 @@ public: class Vlinec1RGBACommand : public DrawerWall1Command { public: - Vlinec1RGBACommand() : DrawerWall1Command(vlinebits, vlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1543,7 +1540,7 @@ public: { do { - uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.bits, _vlinemax), _light, _shade_constants); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.half, loop.height), _light, _shade_constants); *loop.dest = BlendBgra::copy(fg); } while (loop.next()); } @@ -1553,10 +1550,6 @@ public: class Vlinec4RGBACommand : public DrawerWall4Command { public: - Vlinec4RGBACommand() : DrawerWall4Command(vlinebits, vlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1579,7 +1572,7 @@ public: { for (int i = 0; i < 4; i++) { - uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_bufplce[i], _bufplce2[i], _buftexturefracx[i], loop.sample_index(i), loop.bits, _vlinemax), _light[i], _shade_constants); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_bufplce[i], _bufplce2[i], _buftexturefracx[i], loop.sample_index(i), loop.half[i], loop.height[i]), _light[i], _shade_constants); loop.dest[i] = BlendBgra::copy(fg); } } while (loop.next()); @@ -1590,10 +1583,6 @@ public: class Mvlinec1RGBACommand : public DrawerWall1Command { public: - Mvlinec1RGBACommand() : DrawerWall1Command(mvlinebits, mvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1611,7 +1600,7 @@ public: { do { - uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.bits, _vlinemax), _light, _shade_constants); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.half, loop.height), _light, _shade_constants); *loop.dest = BlendBgra::alpha_blend(fg, *loop.dest); } while (loop.next()); } @@ -1621,10 +1610,6 @@ public: class Mvlinec4RGBACommand : public DrawerWall4Command { public: - Mvlinec4RGBACommand(): DrawerWall4Command(mvlinebits, mvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1647,7 +1632,7 @@ public: { for (int i = 0; i < 4; i++) { - uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_bufplce[i], _bufplce2[i], _buftexturefracx[i], loop.sample_index(i), loop.bits, _vlinemax), _light[i], _shade_constants); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_bufplce[i], _bufplce2[i], _buftexturefracx[i], loop.sample_index(i), loop.half[i], loop.height[i]), _light[i], _shade_constants); loop.dest[i] = BlendBgra::alpha_blend(fg, loop.dest[i]); } } while (loop.next()); @@ -1658,10 +1643,6 @@ public: class Tmvline1AddRGBACommand : public DrawerWall1Command { public: - Tmvline1AddRGBACommand() : DrawerWall1Command(tmvlinebits, tmvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1677,10 +1658,6 @@ public: class Tmvline4AddRGBACommand : public DrawerWall4Command { public: - Tmvline4AddRGBACommand() : DrawerWall4Command(tmvlinebits, tmvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1699,10 +1676,6 @@ public: class Tmvline1AddClampRGBACommand : public DrawerWall1Command { public: - Tmvline1AddClampRGBACommand() : DrawerWall1Command(tmvlinebits, tmvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1718,10 +1691,6 @@ public: class Tmvline4AddClampRGBACommand : public DrawerWall4Command { public: - Tmvline4AddClampRGBACommand() : DrawerWall4Command(tmvlinebits, tmvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1740,10 +1709,6 @@ public: class Tmvline1SubClampRGBACommand : public DrawerWall1Command { public: - Tmvline1SubClampRGBACommand() : DrawerWall1Command(tmvlinebits, tmvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1759,10 +1724,6 @@ public: class Tmvline4SubClampRGBACommand : public DrawerWall4Command { public: - Tmvline4SubClampRGBACommand() : DrawerWall4Command(tmvlinebits, tmvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1781,10 +1742,6 @@ public: class Tmvline1RevSubClampRGBACommand : public DrawerWall1Command { public: - Tmvline1RevSubClampRGBACommand() : DrawerWall1Command(tmvlinebits, tmvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1800,10 +1757,6 @@ public: class Tmvline4RevSubClampRGBACommand : public DrawerWall4Command { public: - Tmvline4RevSubClampRGBACommand() : DrawerWall4Command(tmvlinebits, tmvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -2362,17 +2315,8 @@ void R_DrawSlab_rgba(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BY DrawerCommandQueue::QueueCommand(dx, v, dy, vi, vptr, p, slab_rgba_shade_constants, slab_rgba_colormap, slab_rgba_light); } -//extern FTexture *rw_pic; // For the asserts below - DWORD vlinec1_rgba() { - /*DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - DWORD height = rw_pic->GetHeight(); - assert((frac >> vlinebits) < height); - frac += (dc_count-1) * fracstep; - assert((frac >> vlinebits) <= height);*/ - DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 56f1faa24..a60fd65c7 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -459,7 +459,7 @@ public: class SampleBgra { public: - inline static bool span_sampler_setup(const uint32_t * RESTRICT &source, int &xbits, int &ybits, fixed_t xstep, fixed_t ystep) + inline static bool span_sampler_setup(const uint32_t * RESTRICT &source, int &xbits, int &ybits, fixed_t xstep, fixed_t ystep, bool mipmapped) { // Is this a magfilter or minfilter? fixed_t xmagnitude = abs(xstep) >> (32 - xbits - FRACBITS); @@ -467,7 +467,7 @@ public: fixed_t magnitude = (xmagnitude + ymagnitude) * 2 + (1 << (FRACBITS - 1)); bool magnifying = (magnitude >> FRACBITS == 0); - if (r_mipmap) + if (r_mipmap && mipmapped) { int level = magnitude >> (FRACBITS + 1); while (level != 0) @@ -482,18 +482,15 @@ public: } } - return (magnifying && r_magfilter_linear) || (!magnifying && r_minfilter_linear); + return (magnifying && r_magfilter) || (!magnifying && r_minfilter); } - FORCEINLINE static uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, int ybits, uint32_t ymax) + FORCEINLINE static uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, uint32_t half, uint32_t height) { - uint32_t half = 1 << (ybits - 1); - uint32_t y0 = (texturefracy - half) >> ybits; - if (y0 > ymax) - y0 = 0; - uint32_t y1 = y0 + 1; - if (y1 > ymax) - y1 = 0; + uint32_t frac_y0 = ((texturefracy - half) >> FRACBITS) * height; + uint32_t frac_y1 = ((texturefracy + half) >> FRACBITS) * height; + uint32_t y0 = frac_y0 >> FRACBITS; + uint32_t y1 = frac_y1 >> FRACBITS; uint32_t p00 = col0[y0]; uint32_t p01 = col0[y1]; @@ -501,7 +498,7 @@ public: uint32_t p11 = col1[y1]; uint32_t inv_b = texturefracx; - uint32_t inv_a = ((texturefracy + half) >> (ybits - 4)) & 15; + uint32_t inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; uint32_t a = 16 - inv_a; uint32_t b = 16 - inv_b; @@ -546,20 +543,18 @@ public: ///////////////////////////////////////////////////////////////////////////// // SSE/AVX shading macros: -#define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, ybits, ymax) { \ - uint32_t half = 1 << (ybits - 1); \ - \ +#define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, half, height) { \ __m128i m127 = _mm_set1_epi16(127); \ fg = _mm_setzero_si128(); \ for (int i = 0; i < 4; i++) \ { \ - uint32_t y0 = (texturefracy[i] - half) >> ybits; \ - if (y0 > ymax) y0 = 0; \ - uint32_t y1 = y0 + 1; \ - if (y1 > ymax) y1 = 0; \ + uint32_t frac_y0 = ((texturefracy[i] - half[i]) >> FRACBITS) * height[i]; \ + uint32_t frac_y1 = ((texturefracy[i] + half[i]) >> FRACBITS) * height[i]; \ + uint32_t y0 = frac_y0 >> FRACBITS; \ + uint32_t y1 = frac_y1 >> FRACBITS; \ \ uint32_t inv_b = texturefracx[i]; \ - uint32_t inv_a = ((texturefracy[i] + half) >> (ybits - 4)) & 15; \ + uint32_t inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; \ uint32_t a = 16 - inv_a; \ uint32_t b = 16 - inv_b; \ \ diff --git a/src/r_draw_rgba_sse.h b/src/r_draw_rgba_sse.h index bca30185c..ae8d3bf42 100644 --- a/src/r_draw_rgba_sse.h +++ b/src/r_draw_rgba_sse.h @@ -43,7 +43,7 @@ public: _destorg = dc_destorg; _light = ds_light; _shade_constants = ds_shade_constants; - _nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); + _nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep, ds_source_mipmapped); } void Execute(DrawerThread *thread) override @@ -364,14 +364,13 @@ class VecCommand(Vlinec4RGBA) : public DrawerCommand int _count; int _pitch; ShadeConstants _shade_constants; - int _vlinebits; - uint32_t _vlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; const uint32 * RESTRICT bufplce[4]; const uint32_t * RESTRICT bufplce2[4]; uint32_t buftexturefracx[4]; + uint32_t bufheight[4]; public: VecCommand(Vlinec4RGBA)() @@ -380,8 +379,6 @@ public: _count = dc_count; _pitch = dc_pitch; _shade_constants = dc_shade_constants; - _vlinebits = vlinebits; - _vlinemax = vlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; @@ -390,6 +387,7 @@ public: bufplce[i] = (const uint32 *)::bufplce[i]; bufplce2[i] = (const uint32_t *)::bufplce2[i]; buftexturefracx[i] = ::buftexturefracx[i]; + bufheight[i] = ::bufheight[i]; } } @@ -400,9 +398,16 @@ public: return; uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int bits = _vlinebits; int pitch = _pitch * thread->num_cores; + uint32_t height[4]; + uint32_t half[4]; + for (int i = 0; i < 4; i++) + { + height[i] = bufheight[i]; + half[i] = (0x80000000 + height[i] - 1) / height[i]; + } + uint32_t light0 = LightBgra::calc_light_multiplier(palookuplight[0]); uint32_t light1 = LightBgra::calc_light_multiplier(palookuplight[1]); uint32_t light2 = LightBgra::calc_light_multiplier(palookuplight[2]); @@ -431,10 +436,10 @@ public: DWORD place2 = local_vplce[2]; DWORD place3 = local_vplce[3]; - uint32_t p0 = bufplce[0][place0 >> bits]; - uint32_t p1 = bufplce[1][place1 >> bits]; - uint32_t p2 = bufplce[2][place2 >> bits]; - uint32_t p3 = bufplce[3][place3 >> bits]; + uint32_t p0 = bufplce[0][((place0 >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t p1 = bufplce[1][((place1 >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t p2 = bufplce[2][((place2 >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t p3 = bufplce[3][((place3 >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = place0 + local_vince[0]; local_vplce[1] = place1 + local_vince[1]; @@ -457,10 +462,10 @@ public: DWORD place2 = local_vplce[2]; DWORD place3 = local_vplce[3]; - uint32_t p0 = bufplce[0][place0 >> bits]; - uint32_t p1 = bufplce[1][place1 >> bits]; - uint32_t p2 = bufplce[2][place2 >> bits]; - uint32_t p3 = bufplce[3][place3 >> bits]; + uint32_t p0 = bufplce[0][((place0 >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t p1 = bufplce[1][((place1 >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t p2 = bufplce[2][((place2 >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t p3 = bufplce[3][((place3 >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = place0 + local_vince[0]; local_vplce[1] = place1 + local_vince[1]; @@ -482,7 +487,7 @@ public: do { __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits, _vlinemax); + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, half, height); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -500,7 +505,7 @@ public: do { __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits, _vlinemax); + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, half, height); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -522,7 +527,6 @@ class VecCommand(Mvlinec4RGBA) : public DrawerCommand int _count; int _pitch; ShadeConstants _shade_constants; - int _mvlinebits; uint32_t _mvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -530,6 +534,7 @@ class VecCommand(Mvlinec4RGBA) : public DrawerCommand const uint32 * RESTRICT bufplce[4]; const uint32 * RESTRICT bufplce2[4]; uint32_t buftexturefracx[4]; + uint32_t bufheight[4]; public: VecCommand(Mvlinec4RGBA)() @@ -538,8 +543,6 @@ public: _count = dc_count; _pitch = dc_pitch; _shade_constants = dc_shade_constants; - _mvlinebits = mvlinebits; - _mvlinemax = mvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; @@ -548,6 +551,7 @@ public: bufplce[i] = (const uint32 *)::bufplce[i]; bufplce2[i] = (const uint32_t *)::bufplce2[i]; buftexturefracx[i] = ::buftexturefracx[i]; + bufheight[i] = ::bufheight[i]; } } @@ -559,7 +563,13 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = _mvlinebits; + uint32_t height[4]; + uint32_t half[4]; + for (int i = 0; i < 4; i++) + { + height[i] = bufheight[i]; + half[i] = (0x80000000 + height[i] - 1) / height[i]; + } uint32_t light0 = LightBgra::calc_light_multiplier(palookuplight[0]); uint32_t light1 = LightBgra::calc_light_multiplier(palookuplight[1]); @@ -589,10 +599,10 @@ public: DWORD place2 = local_vplce[2]; DWORD place3 = local_vplce[3]; - uint32_t pix0 = bufplce[0][place0 >> bits]; - uint32_t pix1 = bufplce[1][place1 >> bits]; - uint32_t pix2 = bufplce[2][place2 >> bits]; - uint32_t pix3 = bufplce[3][place3 >> bits]; + uint32_t pix0 = bufplce[0][((place0 >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((place1 >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((place2 >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((place3 >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = place0 + local_vince[0]; local_vplce[1] = place1 + local_vince[1]; @@ -617,10 +627,10 @@ public: DWORD place2 = local_vplce[2]; DWORD place3 = local_vplce[3]; - uint32_t pix0 = bufplce[0][place0 >> bits]; - uint32_t pix1 = bufplce[1][place1 >> bits]; - uint32_t pix2 = bufplce[2][place2 >> bits]; - uint32_t pix3 = bufplce[3][place3 >> bits]; + uint32_t pix0 = bufplce[0][((place0 >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((place1 >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((place2 >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((place3 >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = place0 + local_vince[0]; local_vplce[1] = place1 + local_vince[1]; @@ -644,7 +654,7 @@ public: do { __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits, _mvlinemax); + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, half, height); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -664,7 +674,7 @@ public: do { __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits, _mvlinemax); + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, half, height); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -690,12 +700,11 @@ class VecCommand(Tmvline4AddRGBA) : public DrawerCommand ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - int _tmvlinebits; - uint32_t _tmvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; const uint32 * RESTRICT bufplce[4]; + uint32_t bufheight[4]; public: VecCommand(Tmvline4AddRGBA)() @@ -706,14 +715,13 @@ public: _shade_constants = dc_shade_constants; _srcalpha = dc_srcalpha; _destalpha = dc_destalpha; - _tmvlinebits = tmvlinebits; - _tmvlinemax = tmvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; bufplce[i] = (const uint32 *)::bufplce[i]; + bufheight[i] = ::bufheight[i]; } } @@ -725,7 +733,14 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = _tmvlinebits; + + uint32_t height[4]; + uint32_t half[4]; + for (int i = 0; i < 4; i++) + { + height[i] = bufheight[i]; + half[i] = (0x80000000 + height[i] - 1) / height[i]; + } uint32_t light[4]; light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); @@ -754,10 +769,10 @@ public: do { - uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; - uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; - uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; - uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -791,10 +806,10 @@ public: do { - uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; - uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; - uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; - uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -831,12 +846,11 @@ class VecCommand(Tmvline4AddClampRGBA) : public DrawerCommand ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - int _tmvlinebits; - uint32_t _tmvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; const uint32 *RESTRICT bufplce[4]; + uint32_t bufheight[4]; public: VecCommand(Tmvline4AddClampRGBA)() @@ -847,14 +861,13 @@ public: _shade_constants = dc_shade_constants; _srcalpha = dc_srcalpha; _destalpha = dc_destalpha; - _tmvlinebits = tmvlinebits; - _tmvlinemax = tmvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; bufplce[i] = (const uint32 *)::bufplce[i]; + bufheight[i] = ::bufheight[i]; } } @@ -866,7 +879,14 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = _tmvlinebits; + + uint32_t height[4]; + uint32_t half[4]; + for (int i = 0; i < 4; i++) + { + height[i] = bufheight[i]; + half[i] = (0x80000000 + height[i] - 1) / height[i]; + } uint32_t light[4]; light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); @@ -895,10 +915,10 @@ public: do { - uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; - uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; - uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; - uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -931,10 +951,10 @@ public: do { - uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; - uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; - uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; - uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -971,12 +991,11 @@ class VecCommand(Tmvline4SubClampRGBA) : public DrawerCommand ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - int _tmvlinebits; - uint32_t _tmvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; const uint32 *RESTRICT bufplce[4]; + uint32_t bufheight[4]; public: VecCommand(Tmvline4SubClampRGBA)() @@ -987,14 +1006,13 @@ public: _shade_constants = dc_shade_constants; _srcalpha = dc_srcalpha; _destalpha = dc_destalpha; - _tmvlinebits = tmvlinebits; - _tmvlinemax = tmvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; bufplce[i] = (const uint32 *)::bufplce[i]; + bufheight[i] = ::bufheight[i]; } } @@ -1006,7 +1024,14 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = _tmvlinebits; + + uint32_t height[4]; + uint32_t half[4]; + for (int i = 0; i < 4; i++) + { + height[i] = bufheight[i]; + half[i] = (0x80000000 + height[i] - 1) / height[i]; + } uint32_t light[4]; light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); @@ -1035,10 +1060,10 @@ public: do { - uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; - uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; - uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; - uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -1071,10 +1096,10 @@ public: do { - uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; - uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; - uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; - uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -1111,12 +1136,11 @@ class VecCommand(Tmvline4RevSubClampRGBA) : public DrawerCommand ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - int _tmvlinebits; - uint32_t _tmvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; const uint32 *RESTRICT bufplce[4]; + uint32_t bufheight[4]; public: VecCommand(Tmvline4RevSubClampRGBA)() @@ -1127,14 +1151,13 @@ public: _shade_constants = dc_shade_constants; _srcalpha = dc_srcalpha; _destalpha = dc_destalpha; - _tmvlinebits = tmvlinebits; - _tmvlinemax = tmvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; bufplce[i] = (const uint32 *)::bufplce[i]; + bufheight[i] = ::bufheight[4]; } } @@ -1146,7 +1169,14 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = _tmvlinebits; + + uint32_t height[4]; + uint32_t half[4]; + for (int i = 0; i < 4; i++) + { + height[i] = bufheight[i]; + half[i] = (0x80000000 + height[i] - 1) / height[i]; + } uint32_t light[4]; light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); @@ -1175,10 +1205,10 @@ public: do { - uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; - uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; - uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; - uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -1211,10 +1241,10 @@ public: do { - uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; - uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; - uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; - uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 5c9037375..630d64da0 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1075,86 +1075,112 @@ struct WallscanSampler uint32_t uv_pos; uint32_t uv_step; - int32_t uv_fracbits; uint32_t uv_max; const BYTE *source; const BYTE *source2; uint32_t texturefracx; + uint32_t height; }; WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xoffset, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)) { - int base_width = texture->GetWidth(); - int base_height = texture->GetHeight(); - uv_fracbits = 32 - texture->HeightBits; - uv_max = base_height << uv_fracbits; - - // Find start uv in [0-base_height[ range. - // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. - double uv_stepd = swal * yrepeat; - double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / base_height; - v = v - floor(v); - v *= base_height; - v *= (1 << uv_fracbits); - - uv_pos = (uint32_t)v; - uv_step = xs_ToFixed(uv_fracbits, uv_stepd); - - bool magnifying = uv_step >> (uv_fracbits - 1) == 0; - - if (!r_swtruecolor || getcol != R_GetColumn) + if (!r_swtruecolor) { + height = texture->GetHeight(); + int uv_fracbits = 32 - texture->HeightBits; + uv_max = height << uv_fracbits; + + // Find start uv in [0-base_height[ range. + // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. + double uv_stepd = swal * yrepeat; + double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / height; + v = v - floor(v); + v *= height; + v *= (1 << uv_fracbits); + + uv_pos = (uint32_t)v; + uv_step = xs_ToFixed(uv_fracbits, uv_stepd); + if (uv_step == 0) // To prevent divide by zero elsewhere + uv_step = 1; + source = getcol(texture, xoffset >> FRACBITS); source2 = nullptr; texturefracx = 0; } else { - int mipmap_offset = 0; - int mip_width = base_width; - int mip_height = base_height; - if (r_mipmap) - { - fixed_t magnitude = abs((int32_t)uv_step) >> (uv_fracbits - FRACBITS); - int level = magnitude >> FRACBITS; - while (level != 0) - { - if (uv_fracbits > 30) - break; + // Normalize to 0-1 range: + double uv_stepd = swal * yrepeat; + double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / texture->GetHeight(); + v = v - floor(v); + double v_step = uv_stepd / texture->GetHeight(); - mipmap_offset += mip_width * mip_height; - uv_fracbits += 1; - uv_pos >>= 1; - uv_step >>= 1; - xoffset >>= 1; - level >>= 1; - mip_width = MAX(mip_width >> 1, 1); - mip_height = MAX(mip_height >> 1, 1); - } + if (isnan(v) || isnan(v_step)) // this should never happen, but it apparently does.. + { + uv_stepd = 0.0; + v = 0.0; + v_step = 0.0; } - const uint32_t *pixels = texture->GetPixelsBgra() + mipmap_offset; + // Convert to uint32: + uv_pos = (uint32_t)(v * 0x100000000LL); + uv_step = (uint32_t)(v_step * 0x100000000LL); + uv_max = 0; - bool filter_nearest = (magnifying && !r_magfilter_linear) || (!magnifying && !r_minfilter_linear); - if (filter_nearest) + // Texture mipmap and filter selection: + if (getcol != R_GetColumn) { - int tx = (xoffset >> FRACBITS) % mip_width; - if (tx < 0) - tx += mip_width; - source = (BYTE*)(pixels + tx * mip_height); + source = getcol(texture, xoffset >> FRACBITS); source2 = nullptr; + height = texture->GetHeight(); texturefracx = 0; } else { - int tx0 = ((xoffset - FRACUNIT / 2) >> FRACBITS) % mip_width; - if (tx0 < 0) - tx0 += mip_width; - int tx1 = (tx0 + 1) % mip_width; - source = (BYTE*)(pixels + tx0 * mip_height); - source2 = (BYTE*)(pixels + tx1 * mip_height); - texturefracx = ((xoffset + FRACUNIT / 2) >> (FRACBITS - 4)) & 15; + double magnitude = abs(uv_stepd * 2); + bool magnifying = magnitude < 1.0f; + + int mipmap_offset = 0; + int mip_width = texture->GetWidth(); + int mip_height = texture->GetHeight(); + if (r_mipmap && texture->Mipmapped()) + { + int level = (int)MAX(magnitude - 1.0, 0.0); + while (level != 0) + { + mipmap_offset += mip_width * mip_height; + xoffset >>= 1; + level >>= 1; + mip_width = MAX(mip_width >> 1, 1); + mip_height = MAX(mip_height >> 1, 1); + } + } + + const uint32_t *pixels = texture->GetPixelsBgra() + mipmap_offset; + + bool filter_nearest = (magnifying && !r_magfilter) || (!magnifying && !r_minfilter); + if (filter_nearest) + { + int tx = (xoffset >> FRACBITS) % mip_width; + if (tx < 0) + tx += mip_width; + source = (BYTE*)(pixels + tx * mip_height); + source2 = nullptr; + height = mip_height; + texturefracx = 0; + } + else + { + int tx0 = ((xoffset - FRACUNIT / 2) >> FRACBITS) % mip_width; + if (tx0 < 0) + tx0 += mip_width; + int tx1 = (tx0 + 1) % mip_width; + source = (BYTE*)(pixels + tx0 * mip_height); + source2 = (BYTE*)(pixels + tx1 * mip_height); + height = mip_height; + texturefracx = ((xoffset + FRACUNIT / 2) >> (FRACBITS - 4)) & 15; + } } } } @@ -1162,18 +1188,18 @@ WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xof // Draw a column with support for non-power-of-two ranges void wallscan_drawcol1(int x, int y1, int y2, WallscanSampler &sampler, DWORD(*draw1column)()) { - int pixelsize = r_swtruecolor ? 4 : 1; - if (sampler.uv_max == 0) // power of two + if (r_swtruecolor) { int count = y2 - y1; dc_source = sampler.source; dc_source2 = sampler.source2; dc_texturefracx = sampler.texturefracx; - dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_dest = (ylookup[y1] + x) * 4 + dc_destorg; dc_count = count; dc_iscale = sampler.uv_step; dc_texturefrac = sampler.uv_pos; + dc_textureheight = sampler.height; draw1column(); uint64_t step64 = sampler.uv_step; @@ -1182,41 +1208,60 @@ void wallscan_drawcol1(int x, int y1, int y2, WallscanSampler &sampler, DWORD(*d } else { - uint32_t uv_pos = sampler.uv_pos; - - uint32_t left = y2 - y1; - while (left > 0) + if (sampler.uv_max == 0) // power of two { - uint32_t available = sampler.uv_max - uv_pos; - uint32_t next_uv_wrap = available / sampler.uv_step; - if (available % sampler.uv_step != 0) - next_uv_wrap++; - uint32_t count = MIN(left, next_uv_wrap); + int count = y2 - y1; dc_source = sampler.source; dc_source2 = sampler.source2; dc_texturefracx = sampler.texturefracx; - dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_dest = (ylookup[y1] + x) + dc_destorg; dc_count = count; dc_iscale = sampler.uv_step; - dc_texturefrac = uv_pos; + dc_texturefrac = sampler.uv_pos; draw1column(); - left -= count; - uv_pos += sampler.uv_step * count; - if (uv_pos >= sampler.uv_max) - uv_pos -= sampler.uv_max; + uint64_t step64 = sampler.uv_step; + uint64_t pos64 = sampler.uv_pos; + sampler.uv_pos = (uint32_t)(pos64 + step64 * count); } + else + { + uint32_t uv_pos = sampler.uv_pos; - sampler.uv_pos = uv_pos; + uint32_t left = y2 - y1; + while (left > 0) + { + uint32_t available = sampler.uv_max - uv_pos; + uint32_t next_uv_wrap = available / sampler.uv_step; + if (available % sampler.uv_step != 0) + next_uv_wrap++; + uint32_t count = MIN(left, next_uv_wrap); + + dc_source = sampler.source; + dc_source2 = sampler.source2; + dc_texturefracx = sampler.texturefracx; + dc_dest = (ylookup[y1] + x) + dc_destorg; + dc_count = count; + dc_iscale = sampler.uv_step; + dc_texturefrac = uv_pos; + draw1column(); + + left -= count; + uv_pos += sampler.uv_step * count; + if (uv_pos >= sampler.uv_max) + uv_pos -= sampler.uv_max; + } + + sampler.uv_pos = uv_pos; + } } } // Draw four columns with support for non-power-of-two ranges void wallscan_drawcol4(int x, int y1, int y2, WallscanSampler *sampler, void(*draw4columns)()) { - int pixelsize = r_swtruecolor ? 4 : 1; - if (sampler[0].uv_max == 0) // power of two, no wrap handling needed + if (r_swtruecolor) { int count = y2 - y1; for (int i = 0; i < 4; i++) @@ -1224,6 +1269,7 @@ void wallscan_drawcol4(int x, int y1, int y2, WallscanSampler *sampler, void(*dr bufplce[i] = sampler[i].source; bufplce2[i] = sampler[i].source2; buftexturefracx[i] = sampler[i].texturefracx; + bufheight[i] = sampler[i].height; vplce[i] = sampler[i].uv_pos; vince[i] = sampler[i].uv_step; @@ -1231,52 +1277,74 @@ void wallscan_drawcol4(int x, int y1, int y2, WallscanSampler *sampler, void(*dr uint64_t pos64 = sampler[i].uv_pos; sampler[i].uv_pos = (uint32_t)(pos64 + step64 * count); } - dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_dest = (ylookup[y1] + x) * 4 + dc_destorg; dc_count = count; draw4columns(); } else { - dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; - for (int i = 0; i < 4; i++) + if (sampler[0].uv_max == 0) // power of two, no wrap handling needed { - bufplce[i] = sampler[i].source; - bufplce2[i] = sampler[i].source2; - buftexturefracx[i] = sampler[i].texturefracx; - } - - uint32_t left = y2 - y1; - while (left > 0) - { - // Find which column wraps first - uint32_t count = left; - for (int i = 0; i < 4; i++) - { - uint32_t available = sampler[i].uv_max - sampler[i].uv_pos; - uint32_t next_uv_wrap = available / sampler[i].uv_step; - if (available % sampler[i].uv_step != 0) - next_uv_wrap++; - count = MIN(next_uv_wrap, count); - } - - // Draw until that column wraps + int count = y2 - y1; for (int i = 0; i < 4; i++) { + bufplce[i] = sampler[i].source; + bufplce2[i] = sampler[i].source2; + buftexturefracx[i] = sampler[i].texturefracx; vplce[i] = sampler[i].uv_pos; vince[i] = sampler[i].uv_step; + + uint64_t step64 = sampler[i].uv_step; + uint64_t pos64 = sampler[i].uv_pos; + sampler[i].uv_pos = (uint32_t)(pos64 + step64 * count); } + dc_dest = (ylookup[y1] + x) + dc_destorg; dc_count = count; draw4columns(); - - // Wrap the uv position + } + else + { + dc_dest = (ylookup[y1] + x) + dc_destorg; for (int i = 0; i < 4; i++) { - sampler[i].uv_pos += sampler[i].uv_step * count; - if (sampler[i].uv_pos >= sampler[i].uv_max) - sampler[i].uv_pos -= sampler[i].uv_max; + bufplce[i] = sampler[i].source; + bufplce2[i] = sampler[i].source2; + buftexturefracx[i] = sampler[i].texturefracx; } - left -= count; + uint32_t left = y2 - y1; + while (left > 0) + { + // Find which column wraps first + uint32_t count = left; + for (int i = 0; i < 4; i++) + { + uint32_t available = sampler[i].uv_max - sampler[i].uv_pos; + uint32_t next_uv_wrap = available / sampler[i].uv_step; + if (available % sampler[i].uv_step != 0) + next_uv_wrap++; + count = MIN(next_uv_wrap, count); + } + + // Draw until that column wraps + for (int i = 0; i < 4; i++) + { + vplce[i] = sampler[i].uv_pos; + vince[i] = sampler[i].uv_step; + } + dc_count = count; + draw4columns(); + + // Wrap the uv position + for (int i = 0; i < 4; i++) + { + sampler[i].uv_pos += sampler[i].uv_step * count; + if (sampler[i].uv_pos >= sampler[i].uv_max) + sampler[i].uv_pos -= sampler[i].uv_max; + } + + left -= count; + } } } } @@ -1287,7 +1355,7 @@ typedef void(*Draw4ColumnsFuncPtr)(); void wallscan_any( int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x), - void(setupwallscan(int bits, int fracmax, Draw1ColumnFuncPtr &draw1, Draw4ColumnsFuncPtr &draw2))) + void(setupwallscan(int bits, Draw1ColumnFuncPtr &draw1, Draw4ColumnsFuncPtr &draw2))) { if (rw_pic->UseType == FTexture::TEX_Null) return; @@ -1297,7 +1365,7 @@ void wallscan_any( DWORD(*draw1column)(); void(*draw4columns)(); - setupwallscan(32 - rw_pic->HeightBits, (rw_pic->GetHeight() - 1) << (32 - rw_pic->HeightBits), draw1column, draw4columns); + setupwallscan(r_swtruecolor ? FRACBITS : 32 - rw_pic->HeightBits, draw1column, draw4columns); bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); if (fixed) @@ -1450,9 +1518,9 @@ void wallscan_any( void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, int fracmax, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) { - setupvline(bits, fracmax); + setupvline(bits); line1 = dovline1; line4 = dovline4; }); @@ -1466,9 +1534,9 @@ void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t } else { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, int fracmax, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) { - setupmvline(bits, fracmax); + setupmvline(bits); line1 = domvline1; line4 = domvline4; }); @@ -1486,9 +1554,9 @@ void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fi } else { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, int fracmax, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) { - setuptmvline(bits, fracmax); + setuptmvline(bits); line1 = reinterpret_cast(tmvline1); line4 = tmvline4; }); diff --git a/src/textures/textures.h b/src/textures/textures.h index ab9dc3719..bb83f79e7 100644 --- a/src/textures/textures.h +++ b/src/textures/textures.h @@ -185,6 +185,9 @@ public: // Returns the whole texture, stored in column-major order, in BGRA8 format virtual const uint32_t *GetPixelsBgra(); + // Returns true if GetPixelsBgra includes mipmaps + virtual bool Mipmapped() { return true; } + virtual int CopyTrueColorPixels(FBitmap *bmp, int x, int y, int rotate=0, FCopyInfo *inf = NULL); int CopyTrueColorTranslated(FBitmap *bmp, int x, int y, int rotate, FRemapTable *remap, FCopyInfo *inf = NULL); virtual bool UseBasePalette(); @@ -530,6 +533,7 @@ public: void SetUpdated() { bNeedsUpdate = false; bDidUpdate = true; bFirstUpdate = false; } DSimpleCanvas *GetCanvas() { return Canvas; } DSimpleCanvas *GetCanvasBgra() { return CanvasBgra; } + bool Mipmapped() override { return false; } void MakeTexture (); void MakeTextureBgra (); diff --git a/wadsrc/static/language.enu b/wadsrc/static/language.enu index f9050a27b..0bb3a84fd 100644 --- a/wadsrc/static/language.enu +++ b/wadsrc/static/language.enu @@ -1781,6 +1781,9 @@ DSPLYMNU_VSYNC = "Vertical Sync"; DSPLYMNU_CAPFPS = "Rendering Interpolation"; DSPLYMNU_COLUMNMETHOD = "Column render mode"; DSPLYMNU_TRUECOLOR = "True color output"; +DSPLYMNU_MINFILTER = "Linear filter when downscaling"; +DSPLYMNU_MAGFILTER = "Linear filter when upscaling"; +DSPLYMNU_MIPMAP = "Use mipmapped textures"; DSPLYMNU_WIPETYPE = "Screen wipe style"; DSPLYMNU_SHOWENDOOM = "Show ENDOOM screen"; DSPLYMNU_PALLETEHACK = "DirectDraw palette hack"; // Not used diff --git a/wadsrc/static/menudef.txt b/wadsrc/static/menudef.txt index 3c712de96..679db909b 100644 --- a/wadsrc/static/menudef.txt +++ b/wadsrc/static/menudef.txt @@ -662,6 +662,9 @@ OptionMenu "VideoOptions" Option "$DSPLYMNU_CAPFPS", "cl_capfps", "OffOn" Option "$DSPLYMNU_COLUMNMETHOD", "r_columnmethod", "ColumnMethods" Option "$DSPLYMNU_TRUECOLOR", "swtruecolor", "OnOff" + Option "$DSPLYMNU_MINFILTER", "r_minfilter", "OnOff" + Option "$DSPLYMNU_MAGFILTER", "r_magfilter", "OnOff" + Option "$DSPLYMNU_MIPMAP", "r_mipmap", "OnOff" StaticText " " Option "$DSPLYMNU_WIPETYPE", "wipetype", "Wipes" From 3b6d177787842f0d1844673a4b220797215fa1d8 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 25 Jun 2016 12:14:15 +0200 Subject: [PATCH 78/94] Added bicubic interpolation when generating mipmaps --- src/CMakeLists.txt | 1 + src/textures/bicubic_interpolation.cpp | 107 +++++++++++++++++++++++++ src/textures/bicubic_interpolation.h | 50 ++++++++++++ src/textures/texture.cpp | 19 +++++ src/textures/textures.h | 1 + 5 files changed, 178 insertions(+) create mode 100644 src/textures/bicubic_interpolation.cpp create mode 100644 src/textures/bicubic_interpolation.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8c0a30ea0..042da0c8f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1190,6 +1190,7 @@ set (PCH_SOURCES textures/texturemanager.cpp textures/tgatexture.cpp textures/warptexture.cpp + textures/bicubic_interpolation.cpp thingdef/olddecorations.cpp thingdef/thingdef.cpp thingdef/thingdef_codeptr.cpp diff --git a/src/textures/bicubic_interpolation.cpp b/src/textures/bicubic_interpolation.cpp new file mode 100644 index 000000000..2c8a3049d --- /dev/null +++ b/src/textures/bicubic_interpolation.cpp @@ -0,0 +1,107 @@ + +#include "doomtype.h" +#include "bicubic_interpolation.h" + +void BicubicInterpolation::ScaleImage(uint32_t *dest_data, int dest_width, int dest_height, const uint32_t *src_data, int src_width, int src_height) +{ + if (dest_width <= 0 || dest_height <= 0 || src_width <= 0 || src_height <= 0) + return; + + // Scale factor as a rational number r = n / d + int n = dest_width; + int d = src_width; + + const unsigned char *src_ptr = (const unsigned char *)src_data; + unsigned char *dest_ptr = (unsigned char *)dest_data; + + scale(n, d, src_width, src_width * 4, src_height, src_ptr + 0, dest_width, dest_width * 4, dest_height, dest_ptr + 0); + scale(n, d, src_width, src_width * 4, src_height, src_ptr + 1, dest_width, dest_width * 4, dest_height, dest_ptr + 1); + scale(n, d, src_width, src_width * 4, src_height, src_ptr + 2, dest_width, dest_width * 4, dest_height, dest_ptr + 2); + scale(n, d, src_width, src_width * 4, src_height, src_ptr + 3, dest_width, dest_width * 4, dest_height, dest_ptr + 3); +} + +void BicubicInterpolation::scale(int n, int d, int in_width, int in_pitch, int in_height, const unsigned char *f, int out_width, int out_pitch, int out_height, unsigned char *g) +{ + // Implementation of Michael J. Aramini's Efficient Image Magnification by Bicubic Spline Interpolation + + int dimension_size = (out_width > out_height) ? out_width : out_height; + L_vector.resize(dimension_size); + + for (int i=0;i<4;i++) + c_vector[i].resize(dimension_size); + h_vector.resize(in_width); + + int larger_out_dimension; + int j, k, l, m, index; + int *L = &L_vector[0]; + float x; + float *c[4] = { &c_vector[0][0], &c_vector[1][0], &c_vector[2][0], &c_vector[3][0] }; + float *h = &h_vector[0]; + + larger_out_dimension = (out_width > out_height) ? out_width : out_height; + + for (k = 0; k < larger_out_dimension; k++) + L[k] = (k * d) / n; + + for (k = 0; k < n; k++) + { + x = (float)((k * d) % n) / (float)n; + c[0][k] = C0(x); + c[1][k] = C1(x); + c[2][k] = C2(x); + c[3][k] = C3(x); + } + for (k = n; k < larger_out_dimension; k++) + for (l = 0; l < 4; l++) + c[l][k] = c[l][k % n]; + + for (k = 0; k < out_height; k++) + { + for (j = 0; j < in_width; j++) + { + h[j] = 0.0f; + for (l = 0; l < 4; l++) + { + index = L[k] + l - 1; + if ((index >= 0) && (index < in_height)) + h[j] += f[index*in_pitch+j*4] * c[3 - l][k]; + } + } + for (m = 0; m < out_width; m++) + { + x = 0.5f; + for (l = 0; l < 4; l++) + { + index = L[m] + l - 1; + if ((index >= 0) && (index < in_width)) + x += h[index] * c[3 - l][m]; + } + if (x <= 0.0f) + g[k*out_pitch+m*4] = 0; + else if (x >= 255) + g[k*out_pitch+m*4] = 255; + else + g[k*out_pitch+m*4] = (unsigned char)x; + } + } +} + +inline float BicubicInterpolation::C0(float t) +{ + return -a * t * t * t + a * t * t; +} + +inline float BicubicInterpolation::C1(float t) +{ + return -(a + 2.0f) * t * t * t + (2.0f * a + 3.0f) * t * t - a * t; +} + +inline float BicubicInterpolation::C2(float t) +{ + return (a + 2.0f) * t * t * t - (a + 3.0f) * t * t + 1.0f; +} + +inline float BicubicInterpolation::C3(float t) +{ + return a * t * t * t - 2.0f * a * t * t + a * t; +} diff --git a/src/textures/bicubic_interpolation.h b/src/textures/bicubic_interpolation.h new file mode 100644 index 000000000..da547ad83 --- /dev/null +++ b/src/textures/bicubic_interpolation.h @@ -0,0 +1,50 @@ +/* +** Bicubic Image Scaler +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +*/ + +#ifndef __BICUBIC_INTERPOLATION_H__ +#define __BICUBIC_INTERPOLATION_H__ + +#pragma once + +#include + +// Bicubic image scaler +class BicubicInterpolation +{ +public: + void ScaleImage(uint32_t *dest, int dest_width, int dest_height, const uint32_t *src, int src_width, int src_height); + +private: + void scale(int n, int d, int in_width, int in_pitch, int in_height, const unsigned char *in_data, int out_width, int out_pitch, int out_height, unsigned char *out_data); + + float a = -0.5f; // a is a spline parameter such that -1 <= a <= 0 + + inline float C0(float t); + inline float C1(float t); + inline float C2(float t); + inline float C3(float t); + + std::vector L_vector; + std::vector c_vector[4]; + std::vector h_vector; +}; + +#endif diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index f5e4d4aa8..7ff5c9ba2 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -45,6 +45,7 @@ #include "v_video.h" #include "m_fixed.h" #include "textures/textures.h" +#include "textures/bicubic_interpolation.h" #include "v_palette.h" typedef bool (*CheckFunc)(FileReader & file); @@ -381,6 +382,24 @@ int FTexture::MipmapLevels() const } void FTexture::GenerateBgraMipmaps() +{ + BicubicInterpolation bicubic; + + uint32_t *src = PixelsBgra.data(); + uint32_t *dest = src + Width * Height; + int levels = MipmapLevels(); + for (int i = 1; i < levels; i++) + { + int w = MAX(Width >> i, 1); + int h = MAX(Height >> i, 1); + + bicubic.ScaleImage(dest, h, w, src, Height, Width); + + dest += w * h; + } +} + +void FTexture::GenerateBgraMipmapsFast() { uint32_t *src = PixelsBgra.data(); uint32_t *dest = src + Width * Height; diff --git a/src/textures/textures.h b/src/textures/textures.h index bb83f79e7..ff1093a49 100644 --- a/src/textures/textures.h +++ b/src/textures/textures.h @@ -276,6 +276,7 @@ protected: void CreatePixelsBgraWithMipmaps(); void GenerateBgraMipmaps(); + void GenerateBgraMipmapsFast(); int MipmapLevels() const; public: From 4fd127651d9177a8a32b3a5415654ff741f8c459 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 26 Jun 2016 06:54:32 +0200 Subject: [PATCH 79/94] Fixed fuzz drawer crash --- src/r_draw_rgba.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 2576cfeda..a0f534164 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -826,7 +826,10 @@ public: // Handle the case where we would go out of bounds at the top: if (yl < fuzzstep) { - uint32_t bg = dest[fuzzoffset[fuzz] * fuzzstep + pitch]; + uint32_t *srcdest = dest + fuzzoffset[fuzz] * fuzzstep + pitch; + //assert(static_cast((srcdest - (uint32_t*)dc_destorg) / (_pitch)) < viewheight); + + uint32_t bg = *srcdest; uint32_t red = RPART(bg) * 3 / 4; uint32_t green = GPART(bg) * 3 / 4; @@ -842,7 +845,7 @@ public: return; } - bool lowerbounds = (yl + count * fuzzstep > _fuzzviewheight); + bool lowerbounds = (yl + (count + fuzzstep - 1) * fuzzstep > _fuzzviewheight); if (lowerbounds) count--; @@ -858,7 +861,10 @@ public: count -= cnt; do { - uint32_t bg = dest[fuzzoffset[fuzz] * fuzzstep]; + uint32_t *srcdest = dest + fuzzoffset[fuzz] * fuzzstep; + //assert(static_cast((srcdest - (uint32_t*)dc_destorg) / (_pitch)) < viewheight); + + uint32_t bg = *srcdest; uint32_t red = RPART(bg) * 3 / 4; uint32_t green = GPART(bg) * 3 / 4; @@ -875,7 +881,10 @@ public: // Handle the case where we would go out of bounds at the bottom if (lowerbounds) { - uint32_t bg = dest[fuzzoffset[fuzz] * fuzzstep - pitch]; + uint32_t *srcdest = dest + fuzzoffset[fuzz] * fuzzstep - pitch; + //assert(static_cast((srcdest - (uint32_t*)dc_destorg) / (_pitch)) < viewheight); + + uint32_t bg = *srcdest; uint32_t red = RPART(bg) * 3 / 4; uint32_t green = GPART(bg) * 3 / 4; From 928e8e0d4374cff2d2f07957d7ca77d153cec128 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 26 Jun 2016 12:53:10 +0200 Subject: [PATCH 80/94] Improved linear filtering performance by adding a lookup table --- src/r_draw_rgba.cpp | 25 ++++++++++++ src/r_draw_rgba.h | 94 +++++++++++++++++++++++++++++++++++---------- 2 files changed, 99 insertions(+), 20 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index a0f534164..bfabdfbbb 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -98,12 +98,37 @@ CVAR(Bool, r_mipmap, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); ///////////////////////////////////////////////////////////////////////////// +__m128i SampleBgra::samplertable[256 * 2]; + DrawerCommandQueue *DrawerCommandQueue::Instance() { static DrawerCommandQueue queue; return &queue; } +DrawerCommandQueue::DrawerCommandQueue() +{ + for (int inv_b = 0; inv_b < 16; inv_b++) + { + for (int inv_a = 0; inv_a < 16; inv_a++) + { + int a = 16 - inv_a; + int b = 16 - inv_b; + + int ab = a * b; + int invab = inv_a * b; + int ainvb = a * inv_b; + int invainvb = inv_a * inv_b; + + __m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab); + __m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb); + + _mm_store_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2, ab_invab); + _mm_store_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2 + 1, ainvb_invainvb); + } + } +} + DrawerCommandQueue::~DrawerCommandQueue() { StopThreads(); diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index a60fd65c7..4961fa6dc 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -222,6 +222,7 @@ class DrawerCommandQueue static DrawerCommandQueue *Instance(); + DrawerCommandQueue(); ~DrawerCommandQueue(); public: @@ -538,11 +539,75 @@ public: return (alpha << 24) | (red << 16) | (green << 8) | blue; } + +#ifndef NO_SSE + static __m128i samplertable[256 * 2]; +#endif }; ///////////////////////////////////////////////////////////////////////////// // SSE/AVX shading macros: +#define AVX2_SAMPLE_BILINEAR4_COLUMN_INIT(col0, col1, half, height, texturefracx) \ + const uint32_t *baseptr = col0[0]; \ + __m128i coloffsets0 = _mm_setr_epi32(col0[0] - baseptr, col0[1] - baseptr, col0[2] - baseptr, col0[3] - baseptr); \ + __m128i coloffsets1 = _mm_setr_epi32(col1[0] - baseptr, col1[1] - baseptr, col1[2] - baseptr, col1[3] - baseptr); \ + __m128i mhalf = _mm_loadu_si128((const __m128i*)half); \ + __m128i m127 = _mm_set1_epi16(127); \ + __m128i m16 = _mm_set1_epi32(16); \ + __m128i m15 = _mm_set1_epi32(15); \ + __m128i mheight = _mm_loadu_si128((const __m128i*)height); \ + __m128i mtexturefracx = _mm_loadu_si128((const __m128i*)texturefracx); + +#define AVX2_SAMPLE_BILINEAR4_COLUMN(fg, texturefracy) { \ + __m128i mtexturefracy = _mm_loadu_si128((const __m128i*)texturefracy); \ + __m128i multmp0 = _mm_srli_epi32(_mm_sub_epi32(mtexturefracy, mhalf), FRACBITS); \ + __m128i multmp1 = _mm_srli_epi32(_mm_add_epi32(mtexturefracy, mhalf), FRACBITS); \ + __m128i frac_y0 = _mm_or_si128(_mm_mul_epu32(multmp0, mheight), _mm_slli_si128(_mm_mul_epu32(_mm_srli_si128(multmp0, 4), _mm_srli_si128(mheight, 4)), 4)); \ + __m128i frac_y1 = _mm_or_si128(_mm_mul_epu32(multmp1, mheight), _mm_slli_si128(_mm_mul_epu32(_mm_srli_si128(multmp1, 4), _mm_srli_si128(mheight, 4)), 4)); \ + __m128i y0 = _mm_srli_epi32(frac_y0, FRACBITS); \ + __m128i y1 = _mm_srli_epi32(frac_y1, FRACBITS); \ + __m128i inv_b = mtexturefracx; \ + __m128i inv_a = _mm_and_si128(_mm_srli_epi32(frac_y1, FRACBITS - 4), m15); \ + __m128i a = _mm_sub_epi32(m16, inv_a); \ + __m128i b = _mm_sub_epi32(m16, inv_b); \ + __m128i ab = _mm_mullo_epi16(a, b); \ + __m128i invab = _mm_mullo_epi16(inv_a, b); \ + __m128i ainvb = _mm_mullo_epi16(a, inv_b); \ + __m128i invainvb = _mm_mullo_epi16(inv_a, inv_b); \ + __m128i ab_lo = _mm_shuffle_epi32(ab, _MM_SHUFFLE(1, 1, 0, 0)); \ + __m128i ab_hi = _mm_shuffle_epi32(ab, _MM_SHUFFLE(3, 3, 2, 2)); \ + __m128i invab_lo = _mm_shuffle_epi32(invab, _MM_SHUFFLE(1, 1, 0, 0)); \ + __m128i invab_hi = _mm_shuffle_epi32(invab, _MM_SHUFFLE(3, 3, 2, 2)); \ + __m128i ainvb_lo = _mm_shuffle_epi32(ainvb, _MM_SHUFFLE(1, 1, 0, 0)); \ + __m128i ainvb_hi = _mm_shuffle_epi32(ainvb, _MM_SHUFFLE(3, 3, 2, 2)); \ + __m128i invainvb_lo = _mm_shuffle_epi32(invainvb, _MM_SHUFFLE(1, 1, 0, 0)); \ + __m128i invainvb_hi = _mm_shuffle_epi32(invainvb, _MM_SHUFFLE(3, 3, 2, 2)); \ + ab_lo = _mm_or_si128(ab_lo, _mm_slli_epi32(ab_lo, 16)); \ + ab_hi = _mm_or_si128(ab_hi, _mm_slli_epi32(ab_hi, 16)); \ + invab_lo = _mm_or_si128(invab_lo, _mm_slli_epi32(invab_lo, 16)); \ + invab_hi = _mm_or_si128(invab_hi, _mm_slli_epi32(invab_hi, 16)); \ + ainvb_lo = _mm_or_si128(ainvb_lo, _mm_slli_epi32(ainvb_lo, 16)); \ + ainvb_hi = _mm_or_si128(ainvb_hi, _mm_slli_epi32(ainvb_hi, 16)); \ + invainvb_lo = _mm_or_si128(invainvb_lo, _mm_slli_epi32(invainvb_lo, 16)); \ + invainvb_hi = _mm_or_si128(invainvb_hi, _mm_slli_epi32(invainvb_hi, 16)); \ + __m128i p00 = _mm_i32gather_epi32((const int *)baseptr, _mm_add_epi32(y0, coloffsets0), 4); \ + __m128i p01 = _mm_i32gather_epi32((const int *)baseptr, _mm_add_epi32(y1, coloffsets0), 4); \ + __m128i p10 = _mm_i32gather_epi32((const int *)baseptr, _mm_add_epi32(y0, coloffsets1), 4); \ + __m128i p11 = _mm_i32gather_epi32((const int *)baseptr, _mm_add_epi32(y1, coloffsets1), 4); \ + __m128i p00_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(p00, _mm_setzero_si128()), ab_lo); \ + __m128i p01_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(p01, _mm_setzero_si128()), invab_lo); \ + __m128i p10_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(p10, _mm_setzero_si128()), ainvb_lo); \ + __m128i p11_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(p11, _mm_setzero_si128()), invainvb_lo); \ + __m128i p00_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(p00, _mm_setzero_si128()), ab_hi); \ + __m128i p01_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(p01, _mm_setzero_si128()), invab_hi); \ + __m128i p10_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(p10, _mm_setzero_si128()), ainvb_hi); \ + __m128i p11_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(p11, _mm_setzero_si128()), invainvb_hi); \ + __m128i fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_adds_epu16(p00_lo, p01_lo), _mm_adds_epu16(p10_lo, p11_lo)), m127), 8); \ + __m128i fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_adds_epu16(p00_hi, p01_hi), _mm_adds_epu16(p10_hi, p11_hi)), m127), 8); \ + fg = _mm_packus_epi16(fg_lo, fg_hi); \ +} + #define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, half, height) { \ __m128i m127 = _mm_set1_epi16(127); \ fg = _mm_setzero_si128(); \ @@ -550,23 +615,18 @@ public: { \ uint32_t frac_y0 = ((texturefracy[i] - half[i]) >> FRACBITS) * height[i]; \ uint32_t frac_y1 = ((texturefracy[i] + half[i]) >> FRACBITS) * height[i]; \ - uint32_t y0 = frac_y0 >> FRACBITS; \ - uint32_t y1 = frac_y1 >> FRACBITS; \ + uint32_t y0 = (frac_y0 >> FRACBITS); \ + uint32_t y1 = (frac_y1 >> FRACBITS); \ \ uint32_t inv_b = texturefracx[i]; \ uint32_t inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; \ - uint32_t a = 16 - inv_a; \ - uint32_t b = 16 - inv_b; \ \ - uint32_t ab = a * b; \ - uint32_t invab = inv_a * b; \ - uint32_t ainvb = a * inv_b; \ - uint32_t invainvb = inv_a * inv_b; \ - __m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab); \ - __m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb); \ + __m128i ab_invab = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2); \ + __m128i ainvb_invainvb = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2 + 1); \ \ - __m128i p0 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, col0[i][y1], col0[i][y0]), _mm_setzero_si128()); \ - __m128i p1 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, col1[i][y1], col1[i][y0]), _mm_setzero_si128()); \ + __m128i gather = _mm_set_epi32(col1[i][y1], col1[i][y0], col0[i][y1], col1[i][y0]); \ + __m128i p0 = _mm_unpacklo_epi8(gather, _mm_setzero_si128()); \ + __m128i p1 = _mm_unpackhi_epi8(gather, _mm_setzero_si128()); \ \ __m128i tmp = _mm_adds_epu16(_mm_mullo_epi16(p0, ab_invab), _mm_mullo_epi16(p1, ainvb_invainvb)); \ __m128i color = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_srli_si128(tmp, 8), tmp), m127), 8); \ @@ -597,15 +657,9 @@ public: \ uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; \ uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; \ - uint32_t a = 16 - inv_a; \ - uint32_t b = 16 - inv_b; \ \ - uint32_t ab = a * b; \ - uint32_t invab = inv_a * b; \ - uint32_t ainvb = a * inv_b; \ - uint32_t invainvb = inv_a * inv_b; \ - __m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab); \ - __m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb); \ + __m128i ab_invab = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2); \ + __m128i ainvb_invainvb = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2 + 1); \ \ __m128i p0 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, p01, p00), _mm_setzero_si128()); \ __m128i p1 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, p11, p10), _mm_setzero_si128()); \ From 6c037fa24971df781b5581a42cf58651bcb71954 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 26 Jun 2016 21:23:32 +0200 Subject: [PATCH 81/94] Throwing templates at the code redundancy problem in drawers --- src/r_draw_rgba.cpp | 630 ++++++++++++++++++++-------- src/r_draw_rgba.h | 88 ++-- src/r_draw_rgba_sse.h | 923 +---------------------------------------- src/r_drawt_rgba.cpp | 5 + src/r_drawt_rgba_sse.h | 10 + src/r_segs.cpp | 3 +- 6 files changed, 551 insertions(+), 1108 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index bfabdfbbb..fbb2c12c5 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -67,8 +67,13 @@ CVAR(Bool, r_mipmap, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); #ifndef NO_SSE +#ifdef _MSC_VER +#pragma warning(disable: 4101) // warning C4101: unreferenced local variable +#endif + // Generate SSE drawers: #define VecCommand(name) name##_SSE_Command +#define VEC_SHADE_VARS SSE_SHADE_VARS #define VEC_SHADE_SIMPLE_INIT SSE_SHADE_SIMPLE_INIT #define VEC_SHADE_SIMPLE_INIT4 SSE_SHADE_SIMPLE_INIT4 #define VEC_SHADE_SIMPLE SSE_SHADE_SIMPLE @@ -1552,8 +1557,446 @@ public: return (--count) != 0; } }; + +#ifdef NO_SSE + struct NearestSampler + { + FORCEINLINE static uint32_t Sample1(DrawerWall4Command &cmd, LoopIterator &loop, int index) + { + return cmd._bufplce[index][loop.sample_index(index)]; + } + }; + struct LinearSampler + { + FORCEINLINE static uint32_t Sample1(DrawerWall4Command &cmd, LoopIterator &loop, int index) + { + return SampleBgra::sample_bilinear(cmd._bufplce[index], cmd._bufplce2[index], cmd._buftexturefracx[index], loop.vplce[index], loop.half[index], loop.height[index]); + } + }; +#else + struct NearestSampler + { + FORCEINLINE static __m128i Sample4(DrawerWall4Command &cmd, LoopIterator &loop) + { + return _mm_set_epi32(cmd._bufplce[3][loop.sample_index(3)], cmd._bufplce[2][loop.sample_index(2)], cmd._bufplce[1][loop.sample_index(1)], cmd._bufplce[0][loop.sample_index(0)]); + } + }; + + struct LinearSampler + { + FORCEINLINE static __m128i Sample4(DrawerWall4Command &cmd, LoopIterator &loop) + { + __m128i fg; + VEC_SAMPLE_BILINEAR4_COLUMN(fg, cmd._bufplce, cmd._bufplce2, cmd._buftexturefracx, loop.vplce, loop.half, loop.height); + return fg; + } + }; +#endif + +#ifdef NO_SSE + template + struct Copy + { + Copy(DrawerWall4Command &cmd, LoopIterator &loop) + { + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_bgra(Sampler::Sample1(cmd, loop, i), cmd._light[i], cmd._shade_constants); + loop.dest[i] = BlendBgra::copy(fg); + } + } + }; + + template + struct Mask + { + Mask(DrawerWall4Command &cmd, LoopIterator &loop) + { + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_bgra(Sampler::Sample1(cmd, loop, i), cmd._light[i], cmd._shade_constants); + loop.dest[i] = BlendBgra::alpha_blend(fg, loop.dest[i]); + } + } + }; + + template + struct TMaskAdd + { + TMaskAdd(DrawerWall4Command &cmd, LoopIterator &loop) + { + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_bgra(Sampler::Sample1(cmd, loop, i), cmd._light[i], cmd._shade_constants); + loop.dest[i] = BlendBgra::add(fg, loop.dest[i], cmd._srcalpha, calc_blend_bgalpha(fg, cmd._destalpha)); + } + } + }; + + template + struct TMaskSub + { + TMaskSub(DrawerWall4Command &cmd, LoopIterator &loop) + { + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_bgra(Sampler::Sample1(cmd, loop, i), cmd._light[i], cmd._shade_constants); + loop.dest[i] = BlendBgra::sub(fg, loop.dest[i], cmd._srcalpha, calc_blend_bgalpha(fg, cmd._destalpha)); + } + } + }; + + template + struct TMaskRevSub + { + TMaskRevSub(DrawerWall4Command &cmd, LoopIterator &loop) + { + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_bgra(Sampler::Sample1(cmd, loop, i), cmd._light[i], cmd._shade_constants); + loop.dest[i] = BlendBgra::revsub(fg, loop.dest[i], cmd._srcalpha, calc_blend_bgalpha(fg, cmd._destalpha)); + } + } + }; + + typedef Copy CopyNearestSimple; + typedef Copy CopyLinearSimple; + typedef Copy CopyNearest; + typedef Copy CopyLinear; + typedef Mask MaskNearestSimple; + typedef Mask MaskLinearSimple; + typedef Mask MaskNearest; + typedef Mask MaskLinear; + typedef TMaskAdd TMaskAddNearestSimple; + typedef TMaskAdd TMaskAddLinearSimple; + typedef TMaskAdd TMaskAddNearest; + typedef TMaskAdd TMaskAddLinear; + typedef TMaskSub TMaskSubNearestSimple; + typedef TMaskSub TMaskSubLinearSimple; + typedef TMaskSub TMaskSubNearest; + typedef TMaskSub TMaskSubLinear; + typedef TMaskRevSub TMaskRevSubNearestSimple; + typedef TMaskRevSub TMaskRevSubLinearSimple; + typedef TMaskRevSub TMaskRevSubNearest; + typedef TMaskRevSub TMaskRevSubLinear; +#else + template + struct CopySimple + { + VEC_SHADE_VARS(); + CopySimple(DrawerWall4Command &cmd, LoopIterator &loop) + { + VEC_SHADE_SIMPLE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0]); + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + __m128i fg = Sampler::Sample4(cmd, loop); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)loop.dest, fg); + } + }; + + template + struct Copy + { + VEC_SHADE_VARS(); + Copy(DrawerWall4Command &cmd, LoopIterator &loop) + { + VEC_SHADE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0], cmd._shade_constants); + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + __m128i fg = Sampler::Sample4(cmd, loop); + VEC_SHADE(fg, cmd._shade_constants); + _mm_storeu_si128((__m128i*)loop.dest, fg); + } + }; + + template + struct MaskSimple + { + VEC_SHADE_VARS(); + MaskSimple(DrawerWall4Command &cmd, LoopIterator &loop) + { + VEC_SHADE_SIMPLE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0]); + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + __m128i fg = Sampler::Sample4(cmd, loop); + __m128i bg = _mm_loadu_si128((const __m128i*)loop.dest); + VEC_SHADE_SIMPLE(fg); + VEC_ALPHA_BLEND(fg, bg); + _mm_storeu_si128((__m128i*)loop.dest, fg); + } + }; + + template + struct Mask + { + VEC_SHADE_VARS(); + Mask(DrawerWall4Command &cmd, LoopIterator &loop) + { + VEC_SHADE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0], cmd._shade_constants); + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + __m128i fg = Sampler::Sample4(cmd, loop); + __m128i bg = _mm_loadu_si128((const __m128i*)loop.dest); + VEC_SHADE(fg, cmd._shade_constants); + VEC_ALPHA_BLEND(fg, bg); + _mm_storeu_si128((__m128i*)loop.dest, fg); + } + }; + + template + struct TMaskAddSimple + { + VEC_SHADE_VARS(); + VEC_CALC_BLEND_ALPHA_VARS(); + TMaskAddSimple(DrawerWall4Command &cmd, LoopIterator &loop) + { + VEC_SHADE_SIMPLE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0]); + VEC_CALC_BLEND_ALPHA_INIT(cmd._srcalpha, cmd._destalpha); + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + __m128i fg = Sampler::Sample4(cmd, loop); + __m128i bg = _mm_loadu_si128((const __m128i*)loop.dest); + + VEC_CALC_BLEND_ALPHA(fg); + VEC_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)loop.dest, out); + } + }; + + template + struct TMaskAdd + { + VEC_SHADE_VARS(); + VEC_CALC_BLEND_ALPHA_VARS(); + TMaskAdd(DrawerWall4Command &cmd, LoopIterator &loop) + { + VEC_SHADE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0], cmd._shade_constants); + VEC_CALC_BLEND_ALPHA_INIT(cmd._srcalpha, cmd._destalpha); + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + __m128i fg = Sampler::Sample4(cmd, loop); + __m128i bg = _mm_loadu_si128((const __m128i*)loop.dest); + + VEC_CALC_BLEND_ALPHA(fg); + VEC_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)loop.dest, out); + } + }; + + template + struct TMaskSubSimple + { + VEC_SHADE_VARS(); + VEC_CALC_BLEND_ALPHA_VARS(); + TMaskSubSimple(DrawerWall4Command &cmd, LoopIterator &loop) + { + VEC_SHADE_SIMPLE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0]); + VEC_CALC_BLEND_ALPHA_INIT(cmd._srcalpha, cmd._destalpha); + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + __m128i fg = Sampler::Sample4(cmd, loop); + __m128i bg = _mm_loadu_si128((const __m128i*)loop.dest); + + VEC_CALC_BLEND_ALPHA(fg); + VEC_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_hi, bg_alpha_hi), _mm_mullo_epi16(fg_hi, fg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_lo, bg_alpha_lo), _mm_mullo_epi16(fg_lo, fg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)loop.dest, out); + } + }; + + template + struct TMaskSub + { + VEC_SHADE_VARS(); + VEC_CALC_BLEND_ALPHA_VARS(); + TMaskSub(DrawerWall4Command &cmd, LoopIterator &loop) + { + VEC_SHADE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0], cmd._shade_constants); + VEC_CALC_BLEND_ALPHA_INIT(cmd._srcalpha, cmd._destalpha); + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + __m128i fg = Sampler::Sample4(cmd, loop); + __m128i bg = _mm_loadu_si128((const __m128i*)loop.dest); + + VEC_CALC_BLEND_ALPHA(fg); + VEC_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_hi, bg_alpha_hi), _mm_mullo_epi16(fg_hi, fg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_lo, bg_alpha_lo), _mm_mullo_epi16(fg_lo, fg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)loop.dest, out); + } + }; + + template + struct TMaskRevSubSimple + { + VEC_SHADE_VARS(); + VEC_CALC_BLEND_ALPHA_VARS(); + TMaskRevSubSimple(DrawerWall4Command &cmd, LoopIterator &loop) + { + VEC_SHADE_SIMPLE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0]); + VEC_CALC_BLEND_ALPHA_INIT(cmd._srcalpha, cmd._destalpha); + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + __m128i fg = Sampler::Sample4(cmd, loop); + __m128i bg = _mm_loadu_si128((const __m128i*)loop.dest); + + VEC_CALC_BLEND_ALPHA(fg); + VEC_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)loop.dest, out); + } + }; + + template + struct TMaskRevSub + { + VEC_SHADE_VARS(); + VEC_CALC_BLEND_ALPHA_VARS(); + TMaskRevSub(DrawerWall4Command &cmd, LoopIterator &loop) + { + VEC_SHADE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0], cmd._shade_constants); + VEC_CALC_BLEND_ALPHA_INIT(cmd._srcalpha, cmd._destalpha); + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + __m128i fg = Sampler::Sample4(cmd, loop); + __m128i bg = _mm_loadu_si128((const __m128i*)loop.dest); + + VEC_CALC_BLEND_ALPHA(fg); + VEC_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)loop.dest, out); + } + }; + + typedef CopySimple CopyNearestSimple; + typedef CopySimple CopyLinearSimple; + typedef Copy CopyNearest; + typedef Copy CopyLinear; + typedef MaskSimple MaskNearestSimple; + typedef MaskSimple MaskLinearSimple; + typedef Mask MaskNearest; + typedef Mask MaskLinear; + typedef TMaskAddSimple TMaskAddNearestSimple; + typedef TMaskAddSimple TMaskAddLinearSimple; + typedef TMaskAdd TMaskAddNearest; + typedef TMaskAdd TMaskAddLinear; + typedef TMaskSubSimple TMaskSubNearestSimple; + typedef TMaskSubSimple TMaskSubLinearSimple; + typedef TMaskSub TMaskSubNearest; + typedef TMaskSub TMaskSubLinear; + typedef TMaskRevSubSimple TMaskRevSubNearestSimple; + typedef TMaskRevSubSimple TMaskRevSubLinearSimple; + typedef TMaskRevSub TMaskRevSubNearest; + typedef TMaskRevSub TMaskRevSubLinear; +#endif }; +typedef DrawerBlendCommand Vlinec4NearestSimpleRGBACommand; +typedef DrawerBlendCommand Vlinec4NearestRGBACommand; +typedef DrawerBlendCommand Vlinec4LinearSimpleRGBACommand; +typedef DrawerBlendCommand Vlinec4LinearRGBACommand; +typedef DrawerBlendCommand Mvlinec4NearestSimpleRGBACommand; +typedef DrawerBlendCommand Mvlinec4NearestRGBACommand; +typedef DrawerBlendCommand Mvlinec4LinearSimpleRGBACommand; +typedef DrawerBlendCommand Mvlinec4LinearRGBACommand; +typedef DrawerBlendCommand Tmvline4AddNearestSimpleRGBACommand; +typedef DrawerBlendCommand Tmvline4AddNearestRGBACommand; +typedef DrawerBlendCommand Tmvline4AddLinearSimpleRGBACommand; +typedef DrawerBlendCommand Tmvline4AddLinearRGBACommand; +typedef DrawerBlendCommand Tmvline4AddClampNearestSimpleRGBACommand; +typedef DrawerBlendCommand Tmvline4AddClampNearestRGBACommand; +typedef DrawerBlendCommand Tmvline4AddClampLinearSimpleRGBACommand; +typedef DrawerBlendCommand Tmvline4AddClampLinearRGBACommand; +typedef DrawerBlendCommand Tmvline4SubClampNearestSimpleRGBACommand; +typedef DrawerBlendCommand Tmvline4SubClampNearestRGBACommand; +typedef DrawerBlendCommand Tmvline4SubClampLinearSimpleRGBACommand; +typedef DrawerBlendCommand Tmvline4SubClampLinearRGBACommand; +typedef DrawerBlendCommand Tmvline4RevSubClampNearestSimpleRGBACommand; +typedef DrawerBlendCommand Tmvline4RevSubClampNearestRGBACommand; +typedef DrawerBlendCommand Tmvline4RevSubClampLinearSimpleRGBACommand; +typedef DrawerBlendCommand Tmvline4RevSubClampLinearRGBACommand; + class Vlinec1RGBACommand : public DrawerWall1Command { public: @@ -1581,39 +2024,6 @@ public: } }; -class Vlinec4RGBACommand : public DrawerWall4Command -{ -public: - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - - if (_bufplce2[0] == nullptr) - { - do - { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); - loop.dest[i] = BlendBgra::copy(fg); - } - } while (loop.next()); - } - else - { - do - { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_bufplce[i], _bufplce2[i], _buftexturefracx[i], loop.sample_index(i), loop.half[i], loop.height[i]), _light[i], _shade_constants); - loop.dest[i] = BlendBgra::copy(fg); - } - } while (loop.next()); - } - } -}; - class Mvlinec1RGBACommand : public DrawerWall1Command { public: @@ -1641,39 +2051,6 @@ public: } }; -class Mvlinec4RGBACommand : public DrawerWall4Command -{ -public: - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - - if (_bufplce2[0] == nullptr) - { - do - { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); - loop.dest[i] = BlendBgra::alpha_blend(fg, loop.dest[i]); - } - } while (loop.next()); - } - else - { - do - { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_bufplce[i], _bufplce2[i], _buftexturefracx[i], loop.sample_index(i), loop.half[i], loop.height[i]), _light[i], _shade_constants); - loop.dest[i] = BlendBgra::alpha_blend(fg, loop.dest[i]); - } - } while (loop.next()); - } - } -}; - class Tmvline1AddRGBACommand : public DrawerWall1Command { public: @@ -1689,24 +2066,6 @@ public: } }; -class Tmvline4AddRGBACommand : public DrawerWall4Command -{ -public: - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); - loop.dest[i] = BlendBgra::add(fg, loop.dest[i], _srcalpha, calc_blend_bgalpha(fg, _destalpha)); - } - } while (loop.next()); - } -}; - class Tmvline1AddClampRGBACommand : public DrawerWall1Command { public: @@ -1722,24 +2081,6 @@ public: } }; -class Tmvline4AddClampRGBACommand : public DrawerWall4Command -{ -public: - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); - loop.dest[i] = BlendBgra::add(fg, loop.dest[i], _srcalpha, calc_blend_bgalpha(fg, _destalpha)); - } - } while (loop.next()); - } -}; - class Tmvline1SubClampRGBACommand : public DrawerWall1Command { public: @@ -1755,24 +2096,6 @@ public: } }; -class Tmvline4SubClampRGBACommand : public DrawerWall4Command -{ -public: - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); - loop.dest[i] = BlendBgra::sub(fg, loop.dest[i], _srcalpha, calc_blend_bgalpha(fg, _destalpha)); - } - } while (loop.next()); - } -}; - class Tmvline1RevSubClampRGBACommand : public DrawerWall1Command { public: @@ -1788,24 +2111,6 @@ public: } }; -class Tmvline4RevSubClampRGBACommand : public DrawerWall4Command -{ -public: - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); - loop.dest[i] = BlendBgra::revsub(fg, loop.dest[i], _srcalpha, calc_blend_bgalpha(fg, _destalpha)); - } - } while (loop.next()); - } -}; - ///////////////////////////////////////////////////////////////////////////// class DrawFogBoundaryLineRGBACommand : public DrawerCommand @@ -2355,13 +2660,22 @@ DWORD vlinec1_rgba() return dc_texturefrac + dc_count * dc_iscale; } +template +void queue_wallcommand() +{ + if (bufplce2[0] == nullptr && dc_shade_constants.simple_shade) + DrawerCommandQueue::QueueCommand(); + else if (bufplce2[0] == nullptr) + DrawerCommandQueue::QueueCommand(); + else if (dc_shade_constants.simple_shade) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); +} + void vlinec4_rgba() { -#ifdef NO_SSE - DrawerCommandQueue::QueueCommand(); -#else - DrawerCommandQueue::QueueCommand(); -#endif + queue_wallcommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } @@ -2374,11 +2688,7 @@ DWORD mvlinec1_rgba() void mvlinec4_rgba() { -#ifdef NO_SSE - DrawerCommandQueue::QueueCommand(); -#else - DrawerCommandQueue::QueueCommand(); -#endif + queue_wallcommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } @@ -2391,11 +2701,7 @@ fixed_t tmvline1_add_rgba() void tmvline4_add_rgba() { -#ifdef NO_SSE - DrawerCommandQueue::QueueCommand(); -#else - DrawerCommandQueue::QueueCommand(); -#endif + queue_wallcommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } @@ -2408,11 +2714,7 @@ fixed_t tmvline1_addclamp_rgba() void tmvline4_addclamp_rgba() { -#ifdef NO_SSE - DrawerCommandQueue::QueueCommand(); -#else - DrawerCommandQueue::QueueCommand(); -#endif + queue_wallcommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } @@ -2425,11 +2727,7 @@ fixed_t tmvline1_subclamp_rgba() void tmvline4_subclamp_rgba() { -#ifdef NO_SSE - DrawerCommandQueue::QueueCommand(); -#else - DrawerCommandQueue::QueueCommand(); -#endif + queue_wallcommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } @@ -2442,11 +2740,7 @@ fixed_t tmvline1_revsubclamp_rgba() void tmvline4_revsubclamp_rgba() { -#ifdef NO_SSE - DrawerCommandQueue::QueueCommand(); -#else - DrawerCommandQueue::QueueCommand(); -#endif + queue_wallcommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 4961fa6dc..53572c88b 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -286,6 +286,22 @@ public: void Execute(DrawerThread *thread) override; }; +template +class DrawerBlendCommand : public CommandType +{ +public: + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + BlendMode blend(*this, loop); + do + { + blend.Blend(*this, loop); + } while (loop.next()); + } +}; + ///////////////////////////////////////////////////////////////////////////// // Pixel shading inline functions: @@ -624,7 +640,7 @@ public: __m128i ab_invab = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2); \ __m128i ainvb_invainvb = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2 + 1); \ \ - __m128i gather = _mm_set_epi32(col1[i][y1], col1[i][y0], col0[i][y1], col1[i][y0]); \ + __m128i gather = _mm_set_epi32(col1[i][y1], col1[i][y0], col0[i][y1], col0[i][y0]); \ __m128i p0 = _mm_unpacklo_epi8(gather, _mm_setzero_si128()); \ __m128i p1 = _mm_unpackhi_epi8(gather, _mm_setzero_si128()); \ \ @@ -635,6 +651,26 @@ public: } \ } +#define VEC_SAMPLE_MIP_NEAREST4_COLUMN(fg, col0, col1, mipfrac, texturefracy, height0, height1) { \ + uint32_t y0[4], y1[4]; \ + for (int i = 0; i < 4; i++) \ + { \ + y0[i] = (texturefracy[i] >> FRACBITS) * height0[i]; \ + y1[i] = (texturefracy[i] >> FRACBITS) * height1[i]; \ + } \ + __m128i p0 = _mm_set_epi32(col0[y0[3]], col0[y0[2]], col0[y0[1]], col0[y0[0]]); \ + __m128i p1 = _mm_set_epi32(col1[y1[3]], col1[y1[2]], col1[y1[1]], col1[y1[0]]); \ + __m128i t = _mm_loadu_si128((const __m128i*)mipfrac); \ + __m128i inv_t = _mm_sub_epi32(_mm_set1_epi32(256), mipfrac); \ + __m128i p0_lo = _mm_unpacklo_epi8(p0, _mm_setzero_si128()); \ + __m128i p0_hi = _mm_unpackhi_epi8(p0, _mm_setzero_si128()); \ + __m128i p1_lo = _mm_unpacklo_epi8(p1, _mm_setzero_si128()); \ + __m128i p1_hi = _mm_unpackhi_epi8(p1, _mm_setzero_si128()); \ + __m128i fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(p0_lo, t), _mm_mullo_epi16(p1_lo, inv_t)), 8); \ + __m128i fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(p0_hi, t), _mm_mullo_epi16(p1_hi, inv_t)), 8); \ + fg = _mm_packus_epi16(fg_lo, fg_hi); \ +} + #define VEC_SAMPLE_BILINEAR4_SPAN(fg, texture, xfrac, yfrac, xstep, ystep, xbits, ybits) { \ int xshift = (32 - xbits); \ int yshift = (32 - ybits); \ @@ -844,12 +880,14 @@ FORCEINLINE uint32_t calc_blend_bgalpha(uint32_t fg, uint32_t dest_alpha) return (dest_alpha * alpha + 256 * inv_alpha + 128) >> 8; } +#define VEC_CALC_BLEND_ALPHA_VARS() __m128i msrc_alpha, mdest_alpha, m256, m255, m128; + #define VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha) \ - __m128i msrc_alpha = _mm_set1_epi16(src_alpha); \ - __m128i mdest_alpha = _mm_set1_epi16(dest_alpha * 255 / 256); \ - __m128i m256 = _mm_set1_epi16(256); \ - __m128i m255 = _mm_set1_epi16(255); \ - __m128i m128 = _mm_set1_epi16(128); + msrc_alpha = _mm_set1_epi16(src_alpha); \ + mdest_alpha = _mm_set1_epi16(dest_alpha * 255 / 256); \ + m256 = _mm_set1_epi16(256); \ + m255 = _mm_set1_epi16(255); \ + m128 = _mm_set1_epi16(128); // Calculates the final alpha values to be used when combined with the source texture alpha channel #define VEC_CALC_BLEND_ALPHA(fg) \ @@ -866,15 +904,17 @@ FORCEINLINE uint32_t calc_blend_bgalpha(uint32_t fg, uint32_t dest_alpha) fg_alpha_lo = msrc_alpha; \ } +#define SSE_SHADE_VARS() __m128i mlight_hi, mlight_lo, color, fade, fade_amount_hi, fade_amount_lo, inv_desaturate; + // Calculate constants for a simple shade #define SSE_SHADE_SIMPLE_INIT(light) \ - __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ - __m128i mlight_lo = mlight_hi; + mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ + mlight_lo = mlight_hi; // Calculate constants for a simple shade with different light levels for each pixel #define SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0) \ - __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ - __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); + mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ + mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); // Simple shade 4 pixels #define SSE_SHADE_SIMPLE(fg) { \ @@ -889,31 +929,31 @@ FORCEINLINE uint32_t calc_blend_bgalpha(uint32_t fg, uint32_t dest_alpha) // Calculate constants for a complex shade #define SSE_SHADE_INIT(light, shade_constants) \ - __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ - __m128i mlight_lo = mlight_hi; \ - __m128i color = _mm_set_epi16( \ + mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ + mlight_lo = mlight_hi; \ + color = _mm_set_epi16( \ 256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ 256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ - __m128i fade = _mm_set_epi16( \ + fade = _mm_set_epi16( \ 0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ 0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ - __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ - __m128i fade_amount_lo = fade_amount_hi; \ - __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ + fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ + fade_amount_lo = fade_amount_hi; \ + inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ // Calculate constants for a complex shade with different light levels for each pixel #define SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants) \ - __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ - __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); \ - __m128i color = _mm_set_epi16( \ + mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ + mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); \ + color = _mm_set_epi16( \ 256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ 256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ - __m128i fade = _mm_set_epi16( \ + fade = _mm_set_epi16( \ 0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ 0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ - __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ - __m128i fade_amount_lo = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_lo)); \ - __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ + fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ + fade_amount_lo = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_lo)); \ + inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ // Complex shade 4 pixels #define SSE_SHADE(fg, shade_constants) { \ diff --git a/src/r_draw_rgba_sse.h b/src/r_draw_rgba_sse.h index ae8d3bf42..4ee557693 100644 --- a/src/r_draw_rgba_sse.h +++ b/src/r_draw_rgba_sse.h @@ -84,6 +84,7 @@ public: if (shade_constants.simple_shade) { + VEC_SHADE_VARS(); VEC_SHADE_SIMPLE_INIT(light); while (sse_count--) @@ -121,6 +122,7 @@ public: } else { + VEC_SHADE_VARS(); VEC_SHADE_INIT(light, shade_constants); while (sse_count--) @@ -184,6 +186,7 @@ public: if (shade_constants.simple_shade) { + VEC_SHADE_VARS(); VEC_SHADE_SIMPLE_INIT(light); while (sse_count--) @@ -217,6 +220,7 @@ public: } else { + VEC_SHADE_VARS(); VEC_SHADE_INIT(light, shade_constants); while (sse_count--) @@ -277,6 +281,7 @@ public: if (shade_constants.simple_shade) { + VEC_SHADE_VARS(); VEC_SHADE_SIMPLE_INIT(light); while (sse_count--) { @@ -289,6 +294,7 @@ public: } else { + VEC_SHADE_VARS(); VEC_SHADE_INIT(light, shade_constants); while (sse_count--) { @@ -317,6 +323,7 @@ public: if (shade_constants.simple_shade) { + VEC_SHADE_VARS(); VEC_SHADE_SIMPLE_INIT(light); while (sse_count--) { @@ -331,6 +338,7 @@ public: } else { + VEC_SHADE_VARS(); VEC_SHADE_INIT(light, shade_constants); while (sse_count--) { @@ -357,918 +365,3 @@ public: } } }; - -class VecCommand(Vlinec4RGBA) : public DrawerCommand -{ - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32 * RESTRICT bufplce[4]; - const uint32_t * RESTRICT bufplce2[4]; - uint32_t buftexturefracx[4]; - uint32_t bufheight[4]; - -public: - VecCommand(Vlinec4RGBA)() - { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; - bufplce2[i] = (const uint32_t *)::bufplce2[i]; - buftexturefracx[i] = ::buftexturefracx[i]; - bufheight[i] = ::bufheight[i]; - } - } - - void Execute(DrawerThread *thread) override - { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - - uint32_t height[4]; - uint32_t half[4]; - for (int i = 0; i < 4; i++) - { - height[i] = bufheight[i]; - half[i] = (0x80000000 + height[i] - 1) / height[i]; - } - - uint32_t light0 = LightBgra::calc_light_multiplier(palookuplight[0]); - uint32_t light1 = LightBgra::calc_light_multiplier(palookuplight[1]); - uint32_t light2 = LightBgra::calc_light_multiplier(palookuplight[2]); - uint32_t light3 = LightBgra::calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - - if (bufplce2[0] == nullptr) - { - if (shade_constants.simple_shade) - { - VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - uint32_t p0 = bufplce[0][((place0 >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t p1 = bufplce[1][((place1 >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t p2 = bufplce[2][((place2 >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t p3 = bufplce[3][((place3 >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - VEC_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); - } - else - { - VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - uint32_t p0 = bufplce[0][((place0 >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t p1 = bufplce[1][((place1 >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t p2 = bufplce[2][((place2 >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t p3 = bufplce[3][((place3 >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - VEC_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); - } - } - else - { - if (shade_constants.simple_shade) - { - VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - do - { - __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, half, height); - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - VEC_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); - } - else - { - VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - do - { - __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, half, height); - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - VEC_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); - } - } - } -}; - -class VecCommand(Mvlinec4RGBA) : public DrawerCommand -{ - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - uint32_t _mvlinemax; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32 * RESTRICT bufplce[4]; - const uint32 * RESTRICT bufplce2[4]; - uint32_t buftexturefracx[4]; - uint32_t bufheight[4]; - -public: - VecCommand(Mvlinec4RGBA)() - { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; - bufplce2[i] = (const uint32_t *)::bufplce2[i]; - buftexturefracx[i] = ::buftexturefracx[i]; - bufheight[i] = ::bufheight[i]; - } - } - - void Execute(DrawerThread *thread) override - { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - uint32_t height[4]; - uint32_t half[4]; - for (int i = 0; i < 4; i++) - { - height[i] = bufheight[i]; - half[i] = (0x80000000 + height[i] - 1) / height[i]; - } - - uint32_t light0 = LightBgra::calc_light_multiplier(palookuplight[0]); - uint32_t light1 = LightBgra::calc_light_multiplier(palookuplight[1]); - uint32_t light2 = LightBgra::calc_light_multiplier(palookuplight[2]); - uint32_t light3 = LightBgra::calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - - if (bufplce2[0] == nullptr) - { - if (shade_constants.simple_shade) - { - VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - uint32_t pix0 = bufplce[0][((place0 >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t pix1 = bufplce[1][((place1 >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t pix2 = bufplce[2][((place2 >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t pix3 = bufplce[3][((place3 >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - VEC_SHADE_SIMPLE(fg); - VEC_ALPHA_BLEND(fg, bg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); - } - else - { - VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - uint32_t pix0 = bufplce[0][((place0 >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t pix1 = bufplce[1][((place1 >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t pix2 = bufplce[2][((place2 >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t pix3 = bufplce[3][((place3 >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - VEC_SHADE(fg, shade_constants); - VEC_ALPHA_BLEND(fg, bg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); - } - } - else - { - if (shade_constants.simple_shade) - { - VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - do - { - __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, half, height); - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - VEC_SHADE_SIMPLE(fg); - VEC_ALPHA_BLEND(fg, bg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); - } - else - { - VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - do - { - __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, half, height); - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - VEC_SHADE(fg, shade_constants); - VEC_ALPHA_BLEND(fg, bg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); - } - } - } -}; - -class VecCommand(Tmvline4AddRGBA) : public DrawerCommand -{ - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32 * RESTRICT bufplce[4]; - uint32_t bufheight[4]; - -public: - VecCommand(Tmvline4AddRGBA)() - { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; - bufheight[i] = ::bufheight[i]; - } - } - - void Execute(DrawerThread *thread) override - { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - - uint32_t height[4]; - uint32_t half[4]; - for (int i = 0; i < 4; i++) - { - height[i] = bufheight[i]; - half[i] = (0x80000000 + height[i] - 1) / height[i]; - } - - uint32_t light[4]; - light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); - light[1] = LightBgra::calc_light_multiplier(palookuplight[1]); - light[2] = LightBgra::calc_light_multiplier(palookuplight[2]); - light[3] = LightBgra::calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - - if (shade_constants.simple_shade) - { - VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); - VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); - - do - { - uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - - VEC_CALC_BLEND_ALPHA(fg); - VEC_SHADE_SIMPLE(fg); - - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - __m128i out_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); - __m128i out_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); - __m128i out = _mm_packus_epi16(out_lo, out_hi); - - _mm_storeu_si128((__m128i*)dest, out); - dest += pitch; - } while (--count); - } - else - { - VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); - VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); - - do - { - uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg); - VEC_SHADE(fg, shade_constants); - - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - __m128i out_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); - __m128i out_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); - __m128i out = _mm_packus_epi16(out_lo, out_hi); - - _mm_storeu_si128((__m128i*)dest, out); - dest += pitch; - } while (--count); - } - } -}; - -class VecCommand(Tmvline4AddClampRGBA) : public DrawerCommand -{ - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32 *RESTRICT bufplce[4]; - uint32_t bufheight[4]; - -public: - VecCommand(Tmvline4AddClampRGBA)() - { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; - bufheight[i] = ::bufheight[i]; - } - } - - void Execute(DrawerThread *thread) override - { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - - uint32_t height[4]; - uint32_t half[4]; - for (int i = 0; i < 4; i++) - { - height[i] = bufheight[i]; - half[i] = (0x80000000 + height[i] - 1) / height[i]; - } - - uint32_t light[4]; - light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); - light[1] = LightBgra::calc_light_multiplier(palookuplight[1]); - light[2] = LightBgra::calc_light_multiplier(palookuplight[2]); - light[3] = LightBgra::calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - - if (shade_constants.simple_shade) - { - VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); - VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); - - do - { - uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg); - VEC_SHADE_SIMPLE(fg); - - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - __m128i out_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); - __m128i out_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); - __m128i out = _mm_packus_epi16(out_lo, out_hi); - - _mm_storeu_si128((__m128i*)dest, out); - dest += pitch; - } while (--count); - } - else - { - VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); - VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); - - do - { - uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg); - VEC_SHADE(fg, shade_constants); - - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - __m128i out_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); - __m128i out_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); - __m128i out = _mm_packus_epi16(out_lo, out_hi); - - _mm_storeu_si128((__m128i*)dest, out); - dest += pitch; - } while (--count); - } - } -}; - -class VecCommand(Tmvline4SubClampRGBA) : public DrawerCommand -{ - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32 *RESTRICT bufplce[4]; - uint32_t bufheight[4]; - -public: - VecCommand(Tmvline4SubClampRGBA)() - { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; - bufheight[i] = ::bufheight[i]; - } - } - - void Execute(DrawerThread *thread) override - { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - - uint32_t height[4]; - uint32_t half[4]; - for (int i = 0; i < 4; i++) - { - height[i] = bufheight[i]; - half[i] = (0x80000000 + height[i] - 1) / height[i]; - } - - uint32_t light[4]; - light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); - light[1] = LightBgra::calc_light_multiplier(palookuplight[1]); - light[2] = LightBgra::calc_light_multiplier(palookuplight[2]); - light[3] = LightBgra::calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - - if (shade_constants.simple_shade) - { - VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); - VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); - - do - { - uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg); - VEC_SHADE_SIMPLE(fg); - - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_hi, bg_alpha_hi), _mm_mullo_epi16(fg_hi, fg_alpha_hi)), 8); - __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_lo, bg_alpha_lo), _mm_mullo_epi16(fg_lo, fg_alpha_lo)), 8); - __m128i out = _mm_packus_epi16(out_lo, out_hi); - - _mm_storeu_si128((__m128i*)dest, out); - dest += pitch; - } while (--count); - } - else - { - VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); - VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); - - do - { - uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg); - VEC_SHADE(fg, shade_constants); - - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_hi, bg_alpha_hi), _mm_mullo_epi16(fg_hi, fg_alpha_hi)), 8); - __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_lo, bg_alpha_lo), _mm_mullo_epi16(fg_lo, fg_alpha_lo)), 8); - __m128i out = _mm_packus_epi16(out_lo, out_hi); - - _mm_storeu_si128((__m128i*)dest, out); - dest += pitch; - } while (--count); - } - } -}; - -class VecCommand(Tmvline4RevSubClampRGBA) : public DrawerCommand -{ - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32 *RESTRICT bufplce[4]; - uint32_t bufheight[4]; - -public: - VecCommand(Tmvline4RevSubClampRGBA)() - { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; - bufheight[i] = ::bufheight[4]; - } - } - - void Execute(DrawerThread *thread) override - { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - - uint32_t height[4]; - uint32_t half[4]; - for (int i = 0; i < 4; i++) - { - height[i] = bufheight[i]; - half[i] = (0x80000000 + height[i] - 1) / height[i]; - } - - uint32_t light[4]; - light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); - light[1] = LightBgra::calc_light_multiplier(palookuplight[1]); - light[2] = LightBgra::calc_light_multiplier(palookuplight[2]); - light[3] = LightBgra::calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - - if (shade_constants.simple_shade) - { - VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); - VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); - - do - { - uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg); - VEC_SHADE_SIMPLE(fg); - - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); - __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); - __m128i out = _mm_packus_epi16(out_lo, out_hi); - - _mm_storeu_si128((__m128i*)dest, out); - dest += pitch; - } while (--count); - } - else - { - VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); - VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); - - do - { - uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg); - VEC_SHADE(fg, shade_constants); - - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); - __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); - __m128i out = _mm_packus_epi16(out_lo, out_hi); - - _mm_storeu_si128((__m128i*)dest, out); - dest += pitch; - } while (--count); - } - } -}; diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 82932b1f2..45bd5c029 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -53,8 +53,13 @@ extern unsigned int *horizspan[4]; #ifndef NO_SSE +#ifdef _MSC_VER +#pragma warning(disable: 4101) // warning C4101: unreferenced local variable +#endif + // Generate SSE drawers: #define VecCommand(name) name##_SSE_Command +#define VEC_SHADE_VARS SSE_SHADE_VARS #define VEC_SHADE_SIMPLE_INIT SSE_SHADE_SIMPLE_INIT #define VEC_SHADE_SIMPLE_INIT4 SSE_SHADE_SIMPLE_INIT4 #define VEC_SHADE_SIMPLE SSE_SHADE_SIMPLE diff --git a/src/r_drawt_rgba_sse.h b/src/r_drawt_rgba_sse.h index 64a77e288..7a02f2282 100644 --- a/src/r_drawt_rgba_sse.h +++ b/src/r_drawt_rgba_sse.h @@ -60,6 +60,7 @@ public: if (shade_constants.simple_shade) { + VEC_SHADE_VARS(); VEC_SHADE_SIMPLE_INIT(light); if (count & 1) { @@ -110,6 +111,7 @@ public: } else { + VEC_SHADE_VARS(); VEC_SHADE_INIT(light, shade_constants); if (count & 1) { @@ -218,6 +220,7 @@ public: if (shade_constants.simple_shade) { + VEC_SHADE_VARS(); VEC_SHADE_SIMPLE_INIT(light); __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); @@ -254,6 +257,7 @@ public: } else { + VEC_SHADE_VARS(); VEC_SHADE_INIT(light, shade_constants); __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); @@ -421,6 +425,7 @@ public: if (shade_constants.simple_shade) { + VEC_SHADE_VARS(); VEC_SHADE_SIMPLE_INIT(light); __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); @@ -457,6 +462,7 @@ public: } else { + VEC_SHADE_VARS(); VEC_SHADE_INIT(light, shade_constants); __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); @@ -547,6 +553,7 @@ public: if (shade_constants.simple_shade) { + VEC_SHADE_VARS(); VEC_SHADE_SIMPLE_INIT(light); __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); @@ -583,6 +590,7 @@ public: } else { + VEC_SHADE_VARS(); VEC_SHADE_INIT(light, shade_constants); __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); @@ -673,6 +681,7 @@ public: if (shade_constants.simple_shade) { + VEC_SHADE_VARS(); VEC_SHADE_SIMPLE_INIT(light); __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); @@ -709,6 +718,7 @@ public: } else { + VEC_SHADE_VARS(); VEC_SHADE_INIT(light, shade_constants); __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 630d64da0..870d74894 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1146,15 +1146,16 @@ WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xof int mip_height = texture->GetHeight(); if (r_mipmap && texture->Mipmapped()) { + uint32_t xpos = (uint32_t)((((uint64_t)xoffset) << FRACBITS) / mip_width); int level = (int)MAX(magnitude - 1.0, 0.0); while (level != 0) { mipmap_offset += mip_width * mip_height; - xoffset >>= 1; level >>= 1; mip_width = MAX(mip_width >> 1, 1); mip_height = MAX(mip_height >> 1, 1); } + xoffset = (xpos >> FRACBITS) * mip_width; } const uint32_t *pixels = texture->GetPixelsBgra() + mipmap_offset; From 8f38d3af990c5e9373f109781add7448f2de3c9f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 27 Jun 2016 10:49:15 +0200 Subject: [PATCH 82/94] Replaced the bicubic interpolation filter with a simple sharpening filter --- src/CMakeLists.txt | 1 - src/r_draw_rgba.cpp | 4 + src/textures/bicubic_interpolation.cpp | 107 --------------------- src/textures/bicubic_interpolation.h | 50 ---------- src/textures/texture.cpp | 124 ++++++++++++++++++++++--- 5 files changed, 117 insertions(+), 169 deletions(-) delete mode 100644 src/textures/bicubic_interpolation.cpp delete mode 100644 src/textures/bicubic_interpolation.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 042da0c8f..8c0a30ea0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1190,7 +1190,6 @@ set (PCH_SOURCES textures/texturemanager.cpp textures/tgatexture.cpp textures/warptexture.cpp - textures/bicubic_interpolation.cpp thingdef/olddecorations.cpp thingdef/thingdef.cpp thingdef/thingdef_codeptr.cpp diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index fbb2c12c5..aa88e4302 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -103,7 +103,9 @@ CVAR(Bool, r_mipmap, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); ///////////////////////////////////////////////////////////////////////////// +#ifndef NO_SSE __m128i SampleBgra::samplertable[256 * 2]; +#endif DrawerCommandQueue *DrawerCommandQueue::Instance() { @@ -113,6 +115,7 @@ DrawerCommandQueue *DrawerCommandQueue::Instance() DrawerCommandQueue::DrawerCommandQueue() { +#ifndef NO_SSE for (int inv_b = 0; inv_b < 16; inv_b++) { for (int inv_a = 0; inv_a < 16; inv_a++) @@ -132,6 +135,7 @@ DrawerCommandQueue::DrawerCommandQueue() _mm_store_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2 + 1, ainvb_invainvb); } } +#endif } DrawerCommandQueue::~DrawerCommandQueue() diff --git a/src/textures/bicubic_interpolation.cpp b/src/textures/bicubic_interpolation.cpp deleted file mode 100644 index 2c8a3049d..000000000 --- a/src/textures/bicubic_interpolation.cpp +++ /dev/null @@ -1,107 +0,0 @@ - -#include "doomtype.h" -#include "bicubic_interpolation.h" - -void BicubicInterpolation::ScaleImage(uint32_t *dest_data, int dest_width, int dest_height, const uint32_t *src_data, int src_width, int src_height) -{ - if (dest_width <= 0 || dest_height <= 0 || src_width <= 0 || src_height <= 0) - return; - - // Scale factor as a rational number r = n / d - int n = dest_width; - int d = src_width; - - const unsigned char *src_ptr = (const unsigned char *)src_data; - unsigned char *dest_ptr = (unsigned char *)dest_data; - - scale(n, d, src_width, src_width * 4, src_height, src_ptr + 0, dest_width, dest_width * 4, dest_height, dest_ptr + 0); - scale(n, d, src_width, src_width * 4, src_height, src_ptr + 1, dest_width, dest_width * 4, dest_height, dest_ptr + 1); - scale(n, d, src_width, src_width * 4, src_height, src_ptr + 2, dest_width, dest_width * 4, dest_height, dest_ptr + 2); - scale(n, d, src_width, src_width * 4, src_height, src_ptr + 3, dest_width, dest_width * 4, dest_height, dest_ptr + 3); -} - -void BicubicInterpolation::scale(int n, int d, int in_width, int in_pitch, int in_height, const unsigned char *f, int out_width, int out_pitch, int out_height, unsigned char *g) -{ - // Implementation of Michael J. Aramini's Efficient Image Magnification by Bicubic Spline Interpolation - - int dimension_size = (out_width > out_height) ? out_width : out_height; - L_vector.resize(dimension_size); - - for (int i=0;i<4;i++) - c_vector[i].resize(dimension_size); - h_vector.resize(in_width); - - int larger_out_dimension; - int j, k, l, m, index; - int *L = &L_vector[0]; - float x; - float *c[4] = { &c_vector[0][0], &c_vector[1][0], &c_vector[2][0], &c_vector[3][0] }; - float *h = &h_vector[0]; - - larger_out_dimension = (out_width > out_height) ? out_width : out_height; - - for (k = 0; k < larger_out_dimension; k++) - L[k] = (k * d) / n; - - for (k = 0; k < n; k++) - { - x = (float)((k * d) % n) / (float)n; - c[0][k] = C0(x); - c[1][k] = C1(x); - c[2][k] = C2(x); - c[3][k] = C3(x); - } - for (k = n; k < larger_out_dimension; k++) - for (l = 0; l < 4; l++) - c[l][k] = c[l][k % n]; - - for (k = 0; k < out_height; k++) - { - for (j = 0; j < in_width; j++) - { - h[j] = 0.0f; - for (l = 0; l < 4; l++) - { - index = L[k] + l - 1; - if ((index >= 0) && (index < in_height)) - h[j] += f[index*in_pitch+j*4] * c[3 - l][k]; - } - } - for (m = 0; m < out_width; m++) - { - x = 0.5f; - for (l = 0; l < 4; l++) - { - index = L[m] + l - 1; - if ((index >= 0) && (index < in_width)) - x += h[index] * c[3 - l][m]; - } - if (x <= 0.0f) - g[k*out_pitch+m*4] = 0; - else if (x >= 255) - g[k*out_pitch+m*4] = 255; - else - g[k*out_pitch+m*4] = (unsigned char)x; - } - } -} - -inline float BicubicInterpolation::C0(float t) -{ - return -a * t * t * t + a * t * t; -} - -inline float BicubicInterpolation::C1(float t) -{ - return -(a + 2.0f) * t * t * t + (2.0f * a + 3.0f) * t * t - a * t; -} - -inline float BicubicInterpolation::C2(float t) -{ - return (a + 2.0f) * t * t * t - (a + 3.0f) * t * t + 1.0f; -} - -inline float BicubicInterpolation::C3(float t) -{ - return a * t * t * t - 2.0f * a * t * t + a * t; -} diff --git a/src/textures/bicubic_interpolation.h b/src/textures/bicubic_interpolation.h deleted file mode 100644 index da547ad83..000000000 --- a/src/textures/bicubic_interpolation.h +++ /dev/null @@ -1,50 +0,0 @@ -/* -** Bicubic Image Scaler -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef __BICUBIC_INTERPOLATION_H__ -#define __BICUBIC_INTERPOLATION_H__ - -#pragma once - -#include - -// Bicubic image scaler -class BicubicInterpolation -{ -public: - void ScaleImage(uint32_t *dest, int dest_width, int dest_height, const uint32_t *src, int src_width, int src_height); - -private: - void scale(int n, int d, int in_width, int in_pitch, int in_height, const unsigned char *in_data, int out_width, int out_pitch, int out_height, unsigned char *out_data); - - float a = -0.5f; // a is a spline parameter such that -1 <= a <= 0 - - inline float C0(float t); - inline float C1(float t); - inline float C2(float t); - inline float C3(float t); - - std::vector L_vector; - std::vector c_vector[4]; - std::vector h_vector; -}; - -#endif diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index 7ff5c9ba2..ce7874ee6 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -45,7 +45,6 @@ #include "v_video.h" #include "m_fixed.h" #include "textures/textures.h" -#include "textures/bicubic_interpolation.h" #include "v_palette.h" typedef bool (*CheckFunc)(FileReader & file); @@ -383,19 +382,122 @@ int FTexture::MipmapLevels() const void FTexture::GenerateBgraMipmaps() { - BicubicInterpolation bicubic; - - uint32_t *src = PixelsBgra.data(); - uint32_t *dest = src + Width * Height; - int levels = MipmapLevels(); - for (int i = 1; i < levels; i++) + struct Color4f { - int w = MAX(Width >> i, 1); - int h = MAX(Height >> i, 1); + float a, r, g, b; + Color4f operator*(const Color4f &v) const { return Color4f{ a * v.a, r * v.r, g * v.g, b * v.b }; } + Color4f operator/(const Color4f &v) const { return Color4f{ a / v.a, r / v.r, g / v.g, b / v.b }; } + Color4f operator+(const Color4f &v) const { return Color4f{ a + v.a, r + v.r, g + v.g, b + v.b }; } + Color4f operator-(const Color4f &v) const { return Color4f{ a - v.a, r - v.r, g - v.g, b - v.b }; } + Color4f operator*(float s) const { return Color4f{ a * s, r * s, g * s, b * s }; } + Color4f operator/(float s) const { return Color4f{ a / s, r / s, g / s, b / s }; } + Color4f operator+(float s) const { return Color4f{ a + s, r + s, g + s, b + s }; } + Color4f operator-(float s) const { return Color4f{ a - s, r - s, g - s, b - s }; } + }; - bicubic.ScaleImage(dest, h, w, src, Height, Width); + int levels = MipmapLevels(); + std::vector image(PixelsBgra.size()); - dest += w * h; + // Convert to normalized linear colorspace + { + for (int x = 0; x < Width; x++) + { + for (int y = 0; y < Height; y++) + { + uint32_t c8 = PixelsBgra[x * Height + y]; + Color4f c; + c.a = std::pow(APART(c8) * (1.0f / 255.0f), 2.2f); + c.r = std::pow(RPART(c8) * (1.0f / 255.0f), 2.2f); + c.g = std::pow(GPART(c8) * (1.0f / 255.0f), 2.2f); + c.b = std::pow(BPART(c8) * (1.0f / 255.0f), 2.2f); + image[x * Height + y] = c; + } + } + } + + // Generate mipmaps + { + std::vector smoothed(Width * Height); + Color4f *src = image.data(); + Color4f *dest = src + Width * Height; + for (int i = 1; i < levels; i++) + { + int srcw = MAX(Width >> (i - 1), 1); + int srch = MAX(Height >> (i - 1), 1); + int w = MAX(Width >> i, 1); + int h = MAX(Height >> i, 1); + + // Downscale + for (int x = 0; x < w; x++) + { + int sx0 = x * 2; + int sx1 = MIN((x + 1) * 2, srcw - 1); + for (int y = 0; y < h; y++) + { + int sy0 = y * 2; + int sy1 = MIN((y + 1) * 2, srch - 1); + + Color4f src00 = src[sy0 + sx0 * srch]; + Color4f src01 = src[sy1 + sx0 * srch]; + Color4f src10 = src[sy0 + sx1 * srch]; + Color4f src11 = src[sy1 + sx1 * srch]; + Color4f c = (src00 + src01 + src10 + src11) * 0.25f; + + dest[y + x * h] = src00; + } + } + + // Sharpen filter with a 3x3 kernel: + for (int x = 0; x < w; x++) + { + for (int y = 0; y < h; y++) + { + Color4f c = { 0.0f, 0.0f, 0.0f, 0.0f }; + for (int kx = -1; kx < 2; kx++) + { + for (int ky = -1; ky < 2; ky++) + { + int a = y + ky; + int b = x + kx; + if (a < 0) a = h - 1; + if (a == h) a = 0; + if (b < 0) b = w - 1; + if (b == h) b = 0; + c = c + dest[a + b * h]; + } + } + c = c * (1.0f / 9.0f); + smoothed[y + x * h] = c; + } + } + float k = 0.04f; + for (int j = 0; j < w * h; j++) + dest[j] = dest[j] + (dest[j] - smoothed[j]) * k; + + src = dest; + dest += w * h; + } + } + + // Convert to bgra8 sRGB colorspace + { + Color4f *src = image.data() + Width * Height; + uint32_t *dest = PixelsBgra.data() + Width * Height; + for (int i = 1; i < levels; i++) + { + int w = MAX(Width >> i, 1); + int h = MAX(Height >> i, 1); + for (int j = 0; j < w * h; j++) + { + uint32_t a = (uint32_t)clamp(std::pow(src[j].a, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t r = (uint32_t)clamp(std::pow(src[j].r, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t g = (uint32_t)clamp(std::pow(src[j].g, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t b = (uint32_t)clamp(std::pow(src[j].b, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); + dest[j] = (a << 24) | (r << 16) | (g << 8) | b; + } + src += w * h; + dest += w * h; + } } } From 200d357b0d1f609ce67fdb23c03c77836285f0e3 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 27 Jun 2016 11:43:24 +0200 Subject: [PATCH 83/94] Linear filtering bug fix --- src/r_draw_rgba.cpp | 16 ++++++++-------- src/r_draw_rgba.h | 40 ++++++++++++++++++---------------------- src/r_segs.cpp | 4 ++-- 3 files changed, 28 insertions(+), 32 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index aa88e4302..7a071e1d4 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -1436,7 +1436,7 @@ public: uint32_t frac; uint32_t texturefracx; uint32_t height; - uint32_t half; + uint32_t one; LoopIterator(DrawerWall1Command *command, DrawerThread *thread) { @@ -1451,7 +1451,7 @@ public: pitch = command->_pitch * thread->num_cores; height = command->_textureheight; - half = (0x80000000 + height - 1) / height; + one = ((0x80000000 + height - 1) / height) * 2 + 1; } explicit operator bool() @@ -1520,7 +1520,7 @@ public: uint32_t vplce[4]; uint32_t vince[4]; uint32_t height[4]; - uint32_t half[4]; + uint32_t one[4]; LoopIterator(DrawerWall4Command *command, DrawerThread *thread) { @@ -1537,7 +1537,7 @@ public: vplce[i] = command->_vplce[i] + command->_vince[i] * skipped; vince[i] = command->_vince[i] * thread->num_cores; height[i] = command->_bufheight[i]; - half[i] = (0x80000000 + height[i] - 1) / height[i]; + one[i] = ((0x80000000 + height[i] - 1) / height[i]) * 2 + 1; } } @@ -1574,7 +1574,7 @@ public: { FORCEINLINE static uint32_t Sample1(DrawerWall4Command &cmd, LoopIterator &loop, int index) { - return SampleBgra::sample_bilinear(cmd._bufplce[index], cmd._bufplce2[index], cmd._buftexturefracx[index], loop.vplce[index], loop.half[index], loop.height[index]); + return SampleBgra::sample_bilinear(cmd._bufplce[index], cmd._bufplce2[index], cmd._buftexturefracx[index], loop.vplce[index], loop.one[index], loop.height[index]); } }; #else @@ -1591,7 +1591,7 @@ public: FORCEINLINE static __m128i Sample4(DrawerWall4Command &cmd, LoopIterator &loop) { __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, cmd._bufplce, cmd._bufplce2, cmd._buftexturefracx, loop.vplce, loop.half, loop.height); + VEC_SAMPLE_BILINEAR4_COLUMN(fg, cmd._bufplce, cmd._bufplce2, cmd._buftexturefracx, loop.vplce, loop.one, loop.height); return fg; } }; @@ -2021,7 +2021,7 @@ public: { do { - uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.half, loop.height), _light, _shade_constants); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.one, loop.height), _light, _shade_constants); *loop.dest = BlendBgra::copy(fg); } while (loop.next()); } @@ -2048,7 +2048,7 @@ public: { do { - uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.half, loop.height), _light, _shade_constants); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.one, loop.height), _light, _shade_constants); *loop.dest = BlendBgra::alpha_blend(fg, *loop.dest); } while (loop.next()); } diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 53572c88b..27d7bd035 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -502,10 +502,10 @@ public: return (magnifying && r_magfilter) || (!magnifying && r_minfilter); } - FORCEINLINE static uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, uint32_t half, uint32_t height) + FORCEINLINE static uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, uint32_t one, uint32_t height) { - uint32_t frac_y0 = ((texturefracy - half) >> FRACBITS) * height; - uint32_t frac_y1 = ((texturefracy + half) >> FRACBITS) * height; + uint32_t frac_y0 = (texturefracy >> FRACBITS) * height; + uint32_t frac_y1 = ((texturefracy + one) >> FRACBITS) * height; uint32_t y0 = frac_y0 >> FRACBITS; uint32_t y1 = frac_y1 >> FRACBITS; @@ -533,18 +533,16 @@ public: int yshift = (32 - ybits); int xmask = (1 << xshift) - 1; int ymask = (1 << yshift) - 1; - uint32_t xhalf = 1 << (xbits - 1); - uint32_t yhalf = 1 << (ybits - 1); - uint32_t x = (xfrac - xhalf) >> xbits; - uint32_t y = (yfrac - yhalf) >> ybits; + uint32_t x = xfrac >> xbits; + uint32_t y = yfrac >> ybits; uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)]; uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)]; - uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; - uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; + uint32_t inv_b = (xfrac >> (xbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (ybits - 4)) & 15; uint32_t a = 16 - inv_a; uint32_t b = 16 - inv_b; @@ -564,11 +562,11 @@ public: ///////////////////////////////////////////////////////////////////////////// // SSE/AVX shading macros: -#define AVX2_SAMPLE_BILINEAR4_COLUMN_INIT(col0, col1, half, height, texturefracx) \ +#define AVX2_SAMPLE_BILINEAR4_COLUMN_INIT(col0, col1, one, height, texturefracx) \ const uint32_t *baseptr = col0[0]; \ __m128i coloffsets0 = _mm_setr_epi32(col0[0] - baseptr, col0[1] - baseptr, col0[2] - baseptr, col0[3] - baseptr); \ __m128i coloffsets1 = _mm_setr_epi32(col1[0] - baseptr, col1[1] - baseptr, col1[2] - baseptr, col1[3] - baseptr); \ - __m128i mhalf = _mm_loadu_si128((const __m128i*)half); \ + __m128i mone = _mm_loadu_si128((const __m128i*)one); \ __m128i m127 = _mm_set1_epi16(127); \ __m128i m16 = _mm_set1_epi32(16); \ __m128i m15 = _mm_set1_epi32(15); \ @@ -577,8 +575,8 @@ public: #define AVX2_SAMPLE_BILINEAR4_COLUMN(fg, texturefracy) { \ __m128i mtexturefracy = _mm_loadu_si128((const __m128i*)texturefracy); \ - __m128i multmp0 = _mm_srli_epi32(_mm_sub_epi32(mtexturefracy, mhalf), FRACBITS); \ - __m128i multmp1 = _mm_srli_epi32(_mm_add_epi32(mtexturefracy, mhalf), FRACBITS); \ + __m128i multmp0 = _mm_srli_epi32(mtexturefracy, FRACBITS); \ + __m128i multmp1 = _mm_srli_epi32(_mm_add_epi32(mtexturefracy, mone), FRACBITS); \ __m128i frac_y0 = _mm_or_si128(_mm_mul_epu32(multmp0, mheight), _mm_slli_si128(_mm_mul_epu32(_mm_srli_si128(multmp0, 4), _mm_srli_si128(mheight, 4)), 4)); \ __m128i frac_y1 = _mm_or_si128(_mm_mul_epu32(multmp1, mheight), _mm_slli_si128(_mm_mul_epu32(_mm_srli_si128(multmp1, 4), _mm_srli_si128(mheight, 4)), 4)); \ __m128i y0 = _mm_srli_epi32(frac_y0, FRACBITS); \ @@ -624,13 +622,13 @@ public: fg = _mm_packus_epi16(fg_lo, fg_hi); \ } -#define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, half, height) { \ +#define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, one, height) { \ __m128i m127 = _mm_set1_epi16(127); \ fg = _mm_setzero_si128(); \ for (int i = 0; i < 4; i++) \ { \ - uint32_t frac_y0 = ((texturefracy[i] - half[i]) >> FRACBITS) * height[i]; \ - uint32_t frac_y1 = ((texturefracy[i] + half[i]) >> FRACBITS) * height[i]; \ + uint32_t frac_y0 = (texturefracy[i] >> FRACBITS) * height[i]; \ + uint32_t frac_y1 = ((texturefracy[i] + one[i]) >> FRACBITS) * height[i]; \ uint32_t y0 = (frac_y0 >> FRACBITS); \ uint32_t y1 = (frac_y1 >> FRACBITS); \ \ @@ -676,23 +674,21 @@ public: int yshift = (32 - ybits); \ int xmask = (1 << xshift) - 1; \ int ymask = (1 << yshift) - 1; \ - uint32_t xhalf = 1 << (xbits - 1); \ - uint32_t yhalf = 1 << (ybits - 1); \ \ __m128i m127 = _mm_set1_epi16(127); \ fg = _mm_setzero_si128(); \ for (int i = 0; i < 4; i++) \ { \ - uint32_t x = (xfrac - xhalf) >> xbits; \ - uint32_t y = (yfrac - yhalf) >> ybits; \ + uint32_t x = xfrac >> xbits; \ + uint32_t y = yfrac >> ybits; \ \ uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; \ uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)]; \ uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; \ uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)]; \ \ - uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; \ - uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; \ + uint32_t inv_b = (xfrac >> (xbits - 4)) & 15; \ + uint32_t inv_a = (yfrac >> (ybits - 4)) & 15; \ \ __m128i ab_invab = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2); \ __m128i ainvb_invainvb = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2 + 1); \ diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 870d74894..96bb1f948 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1173,14 +1173,14 @@ WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xof } else { - int tx0 = ((xoffset - FRACUNIT / 2) >> FRACBITS) % mip_width; + int tx0 = (xoffset >> FRACBITS) % mip_width; if (tx0 < 0) tx0 += mip_width; int tx1 = (tx0 + 1) % mip_width; source = (BYTE*)(pixels + tx0 * mip_height); source2 = (BYTE*)(pixels + tx1 * mip_height); height = mip_height; - texturefracx = ((xoffset + FRACUNIT / 2) >> (FRACBITS - 4)) & 15; + texturefracx = (xoffset >> (FRACBITS - 4)) & 15; } } } From 7a65a0f5953c33ea32ab4600064541e82603a8be Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 27 Jun 2016 11:57:27 +0200 Subject: [PATCH 84/94] Made mipmapping a little less aggressive --- src/r_segs.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 96bb1f948..2d39a6d97 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1147,11 +1147,12 @@ WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xof if (r_mipmap && texture->Mipmapped()) { uint32_t xpos = (uint32_t)((((uint64_t)xoffset) << FRACBITS) / mip_width); - int level = (int)MAX(magnitude - 1.0, 0.0); - while (level != 0) + double texture_bias = 1.7f; + double level = MAX(magnitude - 3.0, 0.0); + while (level > texture_bias) { mipmap_offset += mip_width * mip_height; - level >>= 1; + level *= 0.5f; mip_width = MAX(mip_width >> 1, 1); mip_height = MAX(mip_height >> 1, 1); } From d1617fcdf08ddc4f03dca8d92195261bf7dcc4ef Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 30 Jun 2016 13:45:06 +0200 Subject: [PATCH 85/94] GCC compile fixes --- src/r_draw_rgba.h | 4 ++++ src/textures/texture.cpp | 16 ++++++++-------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 27d7bd035..96e96530c 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -31,6 +31,10 @@ #include #include +#ifndef NO_SSE +#include +#endif + ///////////////////////////////////////////////////////////////////////////// // Drawer functions: diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index ce7874ee6..160223617 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -406,10 +406,10 @@ void FTexture::GenerateBgraMipmaps() { uint32_t c8 = PixelsBgra[x * Height + y]; Color4f c; - c.a = std::pow(APART(c8) * (1.0f / 255.0f), 2.2f); - c.r = std::pow(RPART(c8) * (1.0f / 255.0f), 2.2f); - c.g = std::pow(GPART(c8) * (1.0f / 255.0f), 2.2f); - c.b = std::pow(BPART(c8) * (1.0f / 255.0f), 2.2f); + c.a = powf(APART(c8) * (1.0f / 255.0f), 2.2f); + c.r = powf(RPART(c8) * (1.0f / 255.0f), 2.2f); + c.g = powf(GPART(c8) * (1.0f / 255.0f), 2.2f); + c.b = powf(BPART(c8) * (1.0f / 255.0f), 2.2f); image[x * Height + y] = c; } } @@ -489,10 +489,10 @@ void FTexture::GenerateBgraMipmaps() int h = MAX(Height >> i, 1); for (int j = 0; j < w * h; j++) { - uint32_t a = (uint32_t)clamp(std::pow(src[j].a, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); - uint32_t r = (uint32_t)clamp(std::pow(src[j].r, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); - uint32_t g = (uint32_t)clamp(std::pow(src[j].g, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); - uint32_t b = (uint32_t)clamp(std::pow(src[j].b, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t a = (uint32_t)clamp(powf(src[j].a, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t r = (uint32_t)clamp(powf(src[j].r, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t g = (uint32_t)clamp(powf(src[j].g, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t b = (uint32_t)clamp(powf(src[j].b, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); dest[j] = (a << 24) | (r << 16) | (g << 8) | b; } src += w * h; From 13ef9a834c45355ba70fb029c54170e44b54cb76 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 30 Jun 2016 13:56:53 +0200 Subject: [PATCH 86/94] Compile fix for gcc/clang --- src/r_draw_rgba.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 96e96530c..c976602f6 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -296,7 +296,7 @@ class DrawerBlendCommand : public CommandType public: void Execute(DrawerThread *thread) override { - LoopIterator loop(this, thread); + typename CommandType::LoopIterator loop(this, thread); if (!loop) return; BlendMode blend(*this, loop); do From b0e9adfc10ad40dde1c080ff8cb0e034e91cd069 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 4 Jul 2016 16:33:19 +0200 Subject: [PATCH 87/94] Fix single layer skies by using a cube box rather than a cylinder --- src/r_plane.cpp | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 0ede451e0..8345a83ce 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -880,14 +880,34 @@ static DWORD lastskycol_bgra[4]; static int skycolplace; static int skycolplace_bgra; +// Treat sky as a cube rather than a cylinder +CVAR(Bool, r_cubesky, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); + // Get a column of sky when there is only one sky texture. static const BYTE *R_GetOneSkyColumn (FTexture *fronttex, int x) { - angle_t column = (skyangle + xtoviewangle[x]) ^ skyflip; - if (!r_swtruecolor) - return fronttex->GetColumn((UMulScale16(column, frontcyl) + frontpos) >> FRACBITS, NULL); + int tx; + if (r_cubesky) + { + int tx0 = (UMulScale16((skyangle + xtoviewangle[0]) ^ skyflip, frontcyl) + frontpos) >> FRACBITS; + int tx1 = tx0 - ((UMulScale16(xtoviewangle[0], frontcyl) * 2) >> FRACBITS); + tx = (int)(tx0 + (tx1 - tx0) * x / viewwidth + 0.5); + tx %= fronttex->GetWidth(); + if (tx < 0) + tx += fronttex->GetWidth(); + } else - return (const BYTE *)fronttex->GetColumnBgra((UMulScale16(column, frontcyl) + frontpos) >> FRACBITS, NULL); + { + angle_t column = (skyangle + xtoviewangle[x]) ^ skyflip; + tx = (UMulScale16(column, frontcyl) + frontpos) >> FRACBITS; + } + + if (!r_swtruecolor) + return fronttex->GetColumn(tx, NULL); + else + { + return (const BYTE *)fronttex->GetColumnBgra(tx, NULL); + } } // Get a column of sky when there are two overlapping sky textures @@ -1030,7 +1050,7 @@ static void R_DrawSky (visplane_t *pl) { // The texture does not tile nicely frontyScale *= skyscale; frontiScale = 1 / frontyScale; - R_DrawSkyStriped (pl); + //R_DrawSkyStriped (pl); } } From 19030b555f233f85334eaca0f2c8c66b91f1e577 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 6 Jul 2016 20:19:01 +0200 Subject: [PATCH 88/94] Fix sky stretching on widescreen displays --- src/r_plane.cpp | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 8345a83ce..75826d328 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -880,21 +880,17 @@ static DWORD lastskycol_bgra[4]; static int skycolplace; static int skycolplace_bgra; -// Treat sky as a cube rather than a cylinder -CVAR(Bool, r_cubesky, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); +CVAR(Bool, r_linearsky, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); // Get a column of sky when there is only one sky texture. static const BYTE *R_GetOneSkyColumn (FTexture *fronttex, int x) { int tx; - if (r_cubesky) + if (r_linearsky) { - int tx0 = (UMulScale16((skyangle + xtoviewangle[0]) ^ skyflip, frontcyl) + frontpos) >> FRACBITS; - int tx1 = tx0 - ((UMulScale16(xtoviewangle[0], frontcyl) * 2) >> FRACBITS); - tx = (int)(tx0 + (tx1 - tx0) * x / viewwidth + 0.5); - tx %= fronttex->GetWidth(); - if (tx < 0) - tx += fronttex->GetWidth(); + angle_t xangle = (angle_t)((0.5 - x / (double)viewwidth) * FocalTangent * ANGLE_90); + angle_t column = (skyangle + xangle) ^ skyflip; + tx = (UMulScale16(column, frontcyl) + frontpos) >> FRACBITS; } else { From 21390e91b8a28c71ba44bf62ee3c7545508a74e2 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 6 Aug 2016 21:04:45 +0200 Subject: [PATCH 89/94] Remove linear sky again --- src/r_plane.cpp | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/src/r_plane.cpp b/src/r_plane.cpp index e25812fbd..c751fc5dc 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -880,30 +880,16 @@ static DWORD lastskycol_bgra[4]; static int skycolplace; static int skycolplace_bgra; -CVAR(Bool, r_linearsky, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); - // Get a column of sky when there is only one sky texture. static const BYTE *R_GetOneSkyColumn (FTexture *fronttex, int x) { - int tx; - if (r_linearsky) - { - angle_t xangle = (angle_t)((0.5 - x / (double)viewwidth) * FocalTangent * ANGLE_90); - angle_t column = (skyangle + xangle) ^ skyflip; - tx = (UMulScale16(column, frontcyl) + frontpos) >> FRACBITS; - } - else - { - angle_t column = (skyangle + xtoviewangle[x]) ^ skyflip; - tx = (UMulScale16(column, frontcyl) + frontpos) >> FRACBITS; - } + angle_t column = (skyangle + xtoviewangle[x]) ^ skyflip; + int tx = (UMulScale16(column, frontcyl) + frontpos) >> FRACBITS; if (!r_swtruecolor) return fronttex->GetColumn(tx, NULL); else - { return (const BYTE *)fronttex->GetColumnBgra(tx, NULL); - } } // Get a column of sky when there are two overlapping sky textures From 7000d0ccf9a97a01ab74853ea571d753e6e252b0 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 6 Aug 2016 22:59:16 +0200 Subject: [PATCH 90/94] Change GetPixelsBgra to use CopyTrueColorPixels --- src/g_strife/strife_sbar.cpp | 11 --- src/menu/playerdisplay.cpp | 11 --- src/textures/jpegtexture.cpp | 109 ------------------------- src/textures/pngtexture.cpp | 151 ----------------------------------- src/textures/texture.cpp | 46 ++++++++--- src/textures/textures.h | 1 + src/textures/warptexture.cpp | 14 +++- 7 files changed, 45 insertions(+), 298 deletions(-) diff --git a/src/g_strife/strife_sbar.cpp b/src/g_strife/strife_sbar.cpp index e1fcb3cda..eb3fa2608 100644 --- a/src/g_strife/strife_sbar.cpp +++ b/src/g_strife/strife_sbar.cpp @@ -34,7 +34,6 @@ public: const BYTE *GetColumn (unsigned int column, const Span **spans_out); const BYTE *GetPixels (); - const uint32_t *GetPixelsBgra() override; bool CheckModified (); void SetVial (int level); @@ -116,16 +115,6 @@ const BYTE *FHealthBar::GetPixels () return Pixels; } -const uint32_t *FHealthBar::GetPixelsBgra() -{ - if (NeedRefresh) - { - MakeTexture(); - PixelsBgra.clear(); - } - return FTexture::GetPixelsBgra(); -} - void FHealthBar::SetVial (int level) { if (level < 0) diff --git a/src/menu/playerdisplay.cpp b/src/menu/playerdisplay.cpp index 7b7e9ca5d..16671975a 100644 --- a/src/menu/playerdisplay.cpp +++ b/src/menu/playerdisplay.cpp @@ -78,7 +78,6 @@ public: const BYTE *GetColumn(unsigned int column, const Span **spans_out); const BYTE *GetPixels(); - const uint32_t *GetPixelsBgra() override; bool CheckModified(); protected: @@ -247,16 +246,6 @@ const BYTE *FBackdropTexture::GetPixels() return Pixels; } -const uint32_t *FBackdropTexture::GetPixelsBgra() -{ - if (LastRenderTic != gametic) - { - Render(); - PixelsBgra.clear(); - } - return FTexture::GetPixelsBgra(); -} - //============================================================================= // // This is one plasma and two rotozoomers. I think it turned out quite awesome. diff --git a/src/textures/jpegtexture.cpp b/src/textures/jpegtexture.cpp index f44b34d08..fc629b37e 100644 --- a/src/textures/jpegtexture.cpp +++ b/src/textures/jpegtexture.cpp @@ -187,7 +187,6 @@ public: const BYTE *GetColumn (unsigned int column, const Span **spans_out); const BYTE *GetPixels (); - const uint32_t *GetPixelsBgra (); void Unload (); FTextureFormat GetFormat (); int CopyTrueColorPixels(FBitmap *bmp, int x, int y, int rotate, FCopyInfo *inf = NULL); @@ -199,7 +198,6 @@ protected: Span DummySpans[2]; void MakeTexture (); - void MakeTextureBgra (); friend class FTexture; }; @@ -358,15 +356,6 @@ const BYTE *FJPEGTexture::GetPixels () return Pixels; } -const uint32_t *FJPEGTexture::GetPixelsBgra() -{ - if (PixelsBgra.empty()) - { - MakeTextureBgra(); - } - return PixelsBgra.data(); -} - //========================================================================== // // @@ -466,104 +455,6 @@ void FJPEGTexture::MakeTexture () } } -void FJPEGTexture::MakeTextureBgra() -{ - FWadLump lump = Wads.OpenLumpNum(SourceLump); - JSAMPLE *buff = NULL; - - jpeg_decompress_struct cinfo; - jpeg_error_mgr jerr; - - CreatePixelsBgraWithMipmaps(); - - cinfo.err = jpeg_std_error(&jerr); - cinfo.err->output_message = JPEG_OutputMessage; - cinfo.err->error_exit = JPEG_ErrorExit; - jpeg_create_decompress(&cinfo); - try - { - FLumpSourceMgr sourcemgr(&lump, &cinfo); - jpeg_read_header(&cinfo, TRUE); - if (!((cinfo.out_color_space == JCS_RGB && cinfo.num_components == 3) || - (cinfo.out_color_space == JCS_CMYK && cinfo.num_components == 4) || - (cinfo.out_color_space == JCS_GRAYSCALE && cinfo.num_components == 1))) - { - Printf(TEXTCOLOR_ORANGE "Unsupported color format\n"); - throw - 1; - } - - jpeg_start_decompress(&cinfo); - - int y = 0; - buff = new BYTE[cinfo.output_width * cinfo.output_components]; - - while (cinfo.output_scanline < cinfo.output_height) - { - int num_scanlines = jpeg_read_scanlines(&cinfo, &buff, 1); - BYTE *in = buff; - uint32_t *out = PixelsBgra.data() + y; - switch (cinfo.out_color_space) - { - case JCS_RGB: - for (int x = Width; x > 0; --x) - { - uint32_t r = in[0]; - uint32_t g = in[1]; - uint32_t b = in[2]; - *out = 0xff000000 | (r << 16) | (g << 8) | b; - out += Height; - in += 3; - } - break; - - case JCS_GRAYSCALE: - for (int x = Width; x > 0; --x) - { - uint32_t gray = in[0]; - *out = 0xff000000 | (gray << 16) | (gray << 8) | gray; - out += Height; - in += 1; - } - break; - - case JCS_CMYK: - // What are you doing using a CMYK image? :) - for (int x = Width; x > 0; --x) - { - // To be precise, these calculations should use 255, but - // 256 is much faster and virtually indistinguishable. - uint32_t r = in[3] - (((256 - in[0])*in[3]) >> 8); - uint32_t g = in[3] - (((256 - in[1])*in[3]) >> 8); - uint32_t b = in[3] - (((256 - in[2])*in[3]) >> 8); - *out = 0xff000000 | (r << 16) | (g << 8) | b; - out += Height; - in += 4; - } - break; - - default: - // The other colorspaces were considered above and discarded, - // but GCC will complain without a default for them here. - break; - } - y++; - } - jpeg_finish_decompress(&cinfo); - jpeg_destroy_decompress(&cinfo); - } - catch (int) - { - Printf(TEXTCOLOR_ORANGE " in texture %s\n", Name.GetChars()); - jpeg_destroy_decompress(&cinfo); - } - if (buff != NULL) - { - delete[] buff; - } - - GenerateBgraMipmaps(); -} - //=========================================================================== // diff --git a/src/textures/pngtexture.cpp b/src/textures/pngtexture.cpp index ee4eabe90..31d76f567 100644 --- a/src/textures/pngtexture.cpp +++ b/src/textures/pngtexture.cpp @@ -57,7 +57,6 @@ public: const BYTE *GetColumn (unsigned int column, const Span **spans_out); const BYTE *GetPixels (); - const uint32_t *GetPixelsBgra (); void Unload (); FTextureFormat GetFormat (); int CopyTrueColorPixels(FBitmap *bmp, int x, int y, int rotate, FCopyInfo *inf = NULL); @@ -81,7 +80,6 @@ protected: DWORD StartOfIDAT; void MakeTexture (); - void MakeTextureBgra (); friend class FTexture; }; @@ -454,15 +452,6 @@ const BYTE *FPNGTexture::GetPixels () return Pixels; } -const uint32_t *FPNGTexture::GetPixelsBgra() -{ - if (PixelsBgra.empty()) - { - MakeTextureBgra(); - } - return PixelsBgra.data(); -} - //========================================================================== // @@ -620,146 +609,6 @@ void FPNGTexture::MakeTexture () delete lump; } -void FPNGTexture::MakeTextureBgra () -{ - FileReader *lump; - - if (SourceLump >= 0) - { - lump = new FWadLump(Wads.OpenLumpNum(SourceLump)); - } - else - { - lump = new FileReader(SourceFile.GetChars()); - } - - CreatePixelsBgraWithMipmaps(); - if (StartOfIDAT != 0) - { - DWORD len, id; - lump->Seek (StartOfIDAT, SEEK_SET); - lump->Read(&len, 4); - lump->Read(&id, 4); - - if (ColorType == 0 || ColorType == 3) /* Grayscale and paletted */ - { - std::vector src(Width*Height); - M_ReadIDAT (lump, src.data(), Width, Height, Width, BitDepth, ColorType, Interlace, BigLong((unsigned int)len)); - - if (!PngPalette.empty()) - { - for (int x = 0; x < Width; x++) - { - for (int y = 0; y < Height; y++) - { - uint32_t r = PngPalette[src[x + y * Width] * 3 + 0]; - uint32_t g = PngPalette[src[x + y * Width] * 3 + 1]; - uint32_t b = PngPalette[src[x + y * Width] * 3 + 2]; - PixelsBgra[x * Height + y] = 0xff000000 | (r << 16) | (g << 8) | b; - } - } - } - else - { - for (int x = 0; x < Width; x++) - { - for (int y = 0; y < Height; y++) - { - uint32_t gray = src[x + y * Width]; - PixelsBgra[x * Height + y] = 0xff000000 | (gray << 16) | (gray << 8) | gray; - } - } - } - } - else /* RGB and/or Alpha present */ - { - int bytesPerPixel = ColorType == 2 ? 3 : ColorType == 4 ? 2 : 4; - BYTE *tempix = new BYTE[Width * Height * bytesPerPixel]; - BYTE *in; - uint32_t *out; - int x, y, pitch, backstep; - - M_ReadIDAT (lump, tempix, Width, Height, Width*bytesPerPixel, BitDepth, ColorType, Interlace, BigLong((unsigned int)len)); - in = tempix; - out = PixelsBgra.data(); - - // Convert from source format to paletted, column-major. - // Formats with alpha maps are reduced to only 1 bit of alpha. - switch (ColorType) - { - case 2: // RGB - pitch = Width * 3; - backstep = Height * pitch - 3; - for (x = Width; x > 0; --x) - { - for (y = Height; y > 0; --y) - { - if (!HaveTrans) - { - *out++ = 0xff000000 | (((uint32_t)in[0]) << 16) | (((uint32_t)in[1]) << 8) | ((uint32_t)in[2]); - } - else - { - if (in[0] == NonPaletteTrans[0] && - in[1] == NonPaletteTrans[1] && - in[2] == NonPaletteTrans[2]) - { - *out++ = 0; - } - else - { - *out++ = 0xff000000 | (((uint32_t)in[0]) << 16) | (((uint32_t)in[1]) << 8) | ((uint32_t)in[2]); - } - } - in += pitch; - } - in -= backstep; - } - break; - - case 4: // Grayscale + Alpha - pitch = Width * 2; - backstep = Height * pitch - 2; - for (x = Width; x > 0; --x) - { - for (y = Height; y > 0; --y) - { - // output as premultiplied alpha - uint32_t alpha = in[1]; - uint32_t gray = (in[0] * alpha + 127) / 255; - *out++ = (alpha << 24) | (gray << 16) | (gray << 8) | gray; - in += pitch; - } - in -= backstep; - } - break; - - case 6: // RGB + Alpha - pitch = Width * 4; - backstep = Height * pitch - 4; - for (x = Width; x > 0; --x) - { - for (y = Height; y > 0; --y) - { - // output as premultiplied alpha - uint32_t alpha = in[3]; - uint32_t red = (in[0] * alpha + 127) / 255; - uint32_t green = (in[1] * alpha + 127) / 255; - uint32_t blue = (in[2] * alpha + 127) / 255; - *out++ = (alpha << 24) | (red << 16) | (green << 8) | blue; - in += pitch; - } - in -= backstep; - } - break; - } - delete[] tempix; - } - } - delete lump; - GenerateBgraMipmaps(); -} - //=========================================================================== // // FPNGTexture::CopyTrueColorPixels diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index 160223617..7dfe04b23 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -194,21 +194,15 @@ const uint32_t *FTexture::GetColumnBgra(unsigned int column, const Span **spans_ const uint32_t *FTexture::GetPixelsBgra() { - if (PixelsBgra.empty()) + if (PixelsBgra.empty() || CheckModified()) { - GetColumn(0, nullptr); - const BYTE *indices = GetPixels(); - if (indices == nullptr) + if (!GetColumn(0, nullptr)) return nullptr; - CreatePixelsBgraWithMipmaps(); - for (int i = 0; i < Width * Height; i++) - { - if (indices[i] != 0) - PixelsBgra[i] = 0xff000000 | GPalette.BaseColors[indices[i]].d; - else - PixelsBgra[i] = 0; - } - GenerateBgraMipmaps(); + + FBitmap bitmap; + bitmap.Create(GetWidth(), GetHeight()); + CopyTrueColorPixels(&bitmap, 0, 0); + GenerateBgraFromBitmap(bitmap); } return PixelsBgra.data(); } @@ -356,6 +350,32 @@ void FTexture::FreeSpans (Span **spans) const M_Free (spans); } +void FTexture::GenerateBgraFromBitmap(const FBitmap &bitmap) +{ + CreatePixelsBgraWithMipmaps(); + + // Transpose and premultiply alpha + const uint32_t *src = (const uint32_t *)bitmap.GetPixels(); + uint32_t *dest = PixelsBgra.data(); + for (int x = 0; x < Width; x++) + { + for (int y = 0; y < Height; y++) + { + uint32_t p = src[x + y * Width]; + uint32_t red = RPART(p); + uint32_t green = GPART(p); + uint32_t blue = BPART(p); + uint32_t alpha = APART(p); + red = (red * alpha + 127) / 255; + green = (green * alpha + 127) / 255; + blue = (blue * alpha + 127) / 255; + dest[y + x * Height] = (alpha << 24) | (red << 16) | (green << 8) | blue; + } + } + + GenerateBgraMipmaps(); +} + void FTexture::CreatePixelsBgraWithMipmaps() { int levels = MipmapLevels(); diff --git a/src/textures/textures.h b/src/textures/textures.h index ff1093a49..e5ecdc679 100644 --- a/src/textures/textures.h +++ b/src/textures/textures.h @@ -274,6 +274,7 @@ protected: std::vector PixelsBgra; + void GenerateBgraFromBitmap(const FBitmap &bitmap); void CreatePixelsBgraWithMipmaps(); void GenerateBgraMipmaps(); void GenerateBgraMipmapsFast(); diff --git a/src/textures/warptexture.cpp b/src/textures/warptexture.cpp index 0d18ab58f..91c7b9fc4 100644 --- a/src/textures/warptexture.cpp +++ b/src/textures/warptexture.cpp @@ -39,6 +39,7 @@ #include "r_utility.h" #include "textures/textures.h" #include "warpbuffer.h" +#include "v_palette.h" FWarpTexture::FWarpTexture (FTexture *source, int warptype) @@ -96,13 +97,20 @@ const BYTE *FWarpTexture::GetPixels () const uint32_t *FWarpTexture::GetPixelsBgra() { DWORD time = r_FrameTime; - if (Pixels == NULL || time != GenTime) { MakeTexture(time); - PixelsBgra.clear(); + CreatePixelsBgraWithMipmaps(); + for (int i = 0; i < Width * Height; i++) + { + if (Pixels[i] != 0) + PixelsBgra[i] = 0xff000000 | GPalette.BaseColors[Pixels[i]].d; + else + PixelsBgra[i] = 0; + } + GenerateBgraMipmapsFast(); } - return FTexture::GetPixelsBgra(); + return PixelsBgra.data(); } const BYTE *FWarpTexture::GetColumn (unsigned int column, const Span **spans_out) From 2f512e54cdee4c1d15f632fcab64d3850004028b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 6 Aug 2016 23:12:34 +0200 Subject: [PATCH 91/94] Remove unused code --- src/textures/pngtexture.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/textures/pngtexture.cpp b/src/textures/pngtexture.cpp index 31d76f567..9a64bac61 100644 --- a/src/textures/pngtexture.cpp +++ b/src/textures/pngtexture.cpp @@ -41,7 +41,6 @@ #include "bitmap.h" #include "v_palette.h" #include "textures/textures.h" -#include //========================================================================== // @@ -74,7 +73,6 @@ protected: bool HaveTrans; WORD NonPaletteTrans[3]; - std::vector PngPalette; BYTE *PaletteMap; int PaletteSize; DWORD StartOfIDAT; @@ -268,12 +266,6 @@ FPNGTexture::FPNGTexture (FileReader &lump, int lumpnum, const FString &filename { lump.Seek (len - PaletteSize * 3, SEEK_CUR); } - for (i = 0; i < PaletteSize; i++) - { - PngPalette.push_back(p.pngpal[i][0]); - PngPalette.push_back(p.pngpal[i][1]); - PngPalette.push_back(p.pngpal[i][2]); - } for (i = PaletteSize - 1; i >= 0; --i) { p.palette[i] = MAKERGB(p.pngpal[i][0], p.pngpal[i][1], p.pngpal[i][2]); From 3c8719f9458d4f210f27e827d89ad86234717232 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 8 Aug 2016 22:35:26 +0200 Subject: [PATCH 92/94] Fix buffer overflow in FTexture::GenerateBgraMipmaps --- src/textures/texture.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index 7dfe04b23..05574e9da 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -482,7 +482,7 @@ void FTexture::GenerateBgraMipmaps() if (a < 0) a = h - 1; if (a == h) a = 0; if (b < 0) b = w - 1; - if (b == h) b = 0; + if (b == w) b = 0; c = c + dest[a + b * h]; } } From abef073ea499f85337ad688e61ac8c65c3a689ac Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 9 Aug 2016 01:17:45 +0200 Subject: [PATCH 93/94] Implemented sloped planes for true color mode --- src/r_draw_rgba.cpp | 144 +++++++++++++++++++++++++++++++++++++++----- src/r_draw_rgba.h | 3 + src/r_plane.cpp | 25 +------- 3 files changed, 134 insertions(+), 38 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 7a071e1d4..69ebfeb84 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -2191,23 +2191,44 @@ public: class DrawTiltedSpanRGBACommand : public DrawerCommand { - int _y; int _x1; int _x2; + int _y; BYTE * RESTRICT _destorg; fixed_t _light; ShadeConstants _shade_constants; - const BYTE * RESTRICT _source; + FVector3 _plane_sz; + FVector3 _plane_su; + FVector3 _plane_sv; + bool _plane_shade; + int _planeshade; + float _planelightfloat; + fixed_t _pviewx; + fixed_t _pviewy; + int _xbits; + int _ybits; + const uint32_t * RESTRICT _source; public: - DrawTiltedSpanRGBACommand(int y, int x1, int x2) + DrawTiltedSpanRGBACommand(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) { - _y = y; _x1 = x1; _x2 = x2; - + _y = y; _destorg = dc_destorg; - _source = ds_source; + _light = ds_light; + _shade_constants = ds_shade_constants; + _plane_sz = plane_sz; + _plane_su = plane_su; + _plane_sv = plane_sv; + _plane_shade = plane_shade; + _planeshade = planeshade; + _planelightfloat = planelightfloat; + _pviewx = pviewx; + _pviewy = pviewy; + _source = (const uint32_t*)ds_source; + _xbits = ds_xbits; + _ybits = ds_ybits; } void Execute(DrawerThread *thread) override @@ -2215,20 +2236,103 @@ public: if (thread->line_skipped_by_thread(_y)) return; - int y = _y; - int x1 = _x1; - int x2 = _x2; + //#define SPANSIZE 32 + //#define INVSPAN 0.03125f + //#define SPANSIZE 8 + //#define INVSPAN 0.125f + #define SPANSIZE 16 + #define INVSPAN 0.0625f - // Slopes are broken currently in master. - // Until R_DrawTiltedPlane is fixed we are just going to fill with a solid color. + int source_width = 1 << _xbits; + int source_height = 1 << _ybits; - uint32_t *source = (uint32_t*)_source; - uint32_t *dest = ylookup[y] + x1 + (uint32_t*)_destorg; + uint32_t *dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; + int count = _x2 - _x1 + 1; - int count = x2 - x1 + 1; + // Depth (Z) change across the span + double iz = _plane_sz[2] + _plane_sz[1] * (centery - _y) + _plane_sz[0] * (_x1 - centerx); + + // Light change across the span + fixed_t lightstart = _light; + fixed_t lightend = lightstart; + if (_plane_shade) + { + double vis_start = iz * _planelightfloat; + double vis_end = (iz + _plane_sz[0] * count) * _planelightfloat; + + lightstart = LIGHTSCALE(vis_start, _planeshade); + lightend = LIGHTSCALE(vis_end, _planeshade); + } + fixed_t light = lightstart; + fixed_t steplight = (lightend - lightstart) / count; + + // Texture coordinates + double uz = _plane_su[2] + _plane_su[1] * (centery - _y) + _plane_su[0] * (_x1 - centerx); + double vz = _plane_sv[2] + _plane_sv[1] * (centery - _y) + _plane_sv[0] * (_x1 - centerx); + double startz = 1.f / iz; + double startu = uz*startz; + double startv = vz*startz; + double izstep = _plane_sz[0] * SPANSIZE; + double uzstep = _plane_su[0] * SPANSIZE; + double vzstep = _plane_sv[0] * SPANSIZE; + + // Linear interpolate in sizes of SPANSIZE to increase speed + while (count >= SPANSIZE) + { + iz += izstep; + uz += uzstep; + vz += vzstep; + + double endz = 1.f / iz; + double endu = uz*endz; + double endv = vz*endz; + uint32_t stepu = (uint32_t)(SQWORD((endu - startu) * INVSPAN)); + uint32_t stepv = (uint32_t)(SQWORD((endv - startv) * INVSPAN)); + uint32_t u = (uint32_t)(SQWORD(startu) + _pviewx); + uint32_t v = (uint32_t)(SQWORD(startv) + _pviewy); + + for (int i = 0; i < SPANSIZE; i++) + { + uint32_t sx = ((u >> 16) * source_width) >> 16; + uint32_t sy = ((v >> 16) * source_height) >> 16; + uint32_t fg = _source[sy + sx * source_height]; + + if (_shade_constants.simple_shade) + *(dest++) = LightBgra::shade_bgra_simple(fg, LightBgra::calc_light_multiplier(light)); + else + *(dest++) = LightBgra::shade_bgra(fg, LightBgra::calc_light_multiplier(light), _shade_constants); + + u += stepu; + v += stepv; + light += steplight; + } + startu = endu; + startv = endv; + count -= SPANSIZE; + } + + // The last few pixels at the end while (count > 0) { - *(dest++) = source[0]; + double endz = 1.f / iz; + startu = uz*endz; + startv = vz*endz; + uint32_t u = (uint32_t)(SQWORD(startu) + _pviewx); + uint32_t v = (uint32_t)(SQWORD(startv) + _pviewy); + + uint32_t sx = ((u >> 16) * source_width) >> 16; + uint32_t sy = ((v >> 16) * source_height) >> 16; + uint32_t fg = _source[sy + sx * source_height]; + + if (_shade_constants.simple_shade) + *(dest++) = LightBgra::shade_bgra_simple(fg, LightBgra::calc_light_multiplier(light)); + else + *(dest++) = LightBgra::shade_bgra(fg, LightBgra::calc_light_multiplier(light), _shade_constants); + + iz += _plane_sz[0]; + uz += _plane_su[0]; + vz += _plane_sv[0]; + light += steplight; count--; } } @@ -2633,6 +2737,16 @@ void R_FillSpan_rgba() DrawerCommandQueue::QueueCommand(); } +void R_DrawTiltedSpan_rgba(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) +{ + DrawerCommandQueue::QueueCommand(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); +} + +void R_DrawColoredSpan_rgba(int y, int x1, int x2) +{ + DrawerCommandQueue::QueueCommand(y, x1, x2); +} + static ShadeConstants slab_rgba_shade_constants; static const BYTE *slab_rgba_colormap; static fixed_t slab_rgba_light; diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index c976602f6..083258bf0 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -94,6 +94,9 @@ void R_DrawSpanAddClamp_rgba(); void R_DrawSpanMaskedAddClamp_rgba(); void R_FillSpan_rgba(); +void R_DrawTiltedSpan_rgba(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy); +void R_DrawColoredSpan_rgba(int y, int x1, int x2); + void R_SetupDrawSlab_rgba(FColormap *base_colormap, float light, int shade); void R_DrawSlab_rgba(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); diff --git a/src/r_plane.cpp b/src/r_plane.cpp index c751fc5dc..706d6fad7 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -480,23 +480,7 @@ void R_MapTiltedPlane_C (int y, int x1) void R_MapTiltedPlane_rgba (int y, int x1) { - int x2 = spanend[y]; - - // Slopes are broken currently in master. - // Until R_DrawTiltedPlane is fixed we are just going to fill with a solid color. - - uint32_t *source = (uint32_t*)ds_source; - int source_width = 1 << ds_xbits; - int source_height = 1 << ds_ybits; - - uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; - - int count = x2 - x1 + 1; - while (count > 0) - { - *(dest++) = source[0]; - count--; - } + R_DrawTiltedSpan_rgba(y, x1, spanend[y], plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); } //========================================================================== @@ -512,12 +496,7 @@ void R_MapColoredPlane_C (int y, int x1) void R_MapColoredPlane_rgba(int y, int x1) { - uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; - int count = (spanend[y] - x1 + 1); - uint32_t light = LightBgra::calc_light_multiplier(ds_light); - uint32_t color = LightBgra::shade_pal_index_simple(ds_color, light); - for (int i = 0; i < count; i++) - dest[i] = color; + R_DrawColoredSpan_rgba(y, x1, spanend[y]); } //========================================================================== From f56250b9107ab0446c040aca51419a7c1cd25479 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 14 Aug 2016 05:10:34 +0200 Subject: [PATCH 94/94] Remove premultiplied alpha --- src/r_draw_rgba.h | 23 +++++++++++++++-------- src/textures/texture.cpp | 12 ++---------- 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 083258bf0..ca54f7263 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -473,9 +473,9 @@ public: { uint32_t alpha = APART(fg) + (APART(fg) >> 7); // 255 -> 256 uint32_t inv_alpha = 256 - alpha; - uint32_t red = MIN(RPART(fg) + (RPART(bg) * inv_alpha) / 256, 255); - uint32_t green = MIN(GPART(fg) + (GPART(bg) * inv_alpha) / 256, 255); - uint32_t blue = MIN(BPART(fg) + (BPART(bg) * inv_alpha) / 256, 255); + uint32_t red = MIN(RPART(fg) * alpha + (RPART(bg) * inv_alpha) / 256, 255); + uint32_t green = MIN(GPART(fg) * alpha + (GPART(bg) * inv_alpha) / 256, 255); + uint32_t blue = MIN(BPART(fg) * alpha + (BPART(bg) * inv_alpha) / 256, 255); return 0xff000000 | (red << 16) | (green << 8) | blue; } }; @@ -861,11 +861,18 @@ public: __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); \ __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); \ - __m128i m255 = _mm_set1_epi16(255); \ - __m128i inv_alpha_hi = _mm_sub_epi16(m255, _mm_shufflehi_epi16(_mm_shufflelo_epi16(fg_hi, _MM_SHUFFLE(3,3,3,3)), _MM_SHUFFLE(3,3,3,3))); \ - __m128i inv_alpha_lo = _mm_sub_epi16(m255, _mm_shufflehi_epi16(_mm_shufflelo_epi16(fg_lo, _MM_SHUFFLE(3,3,3,3)), _MM_SHUFFLE(3,3,3,3))); \ - inv_alpha_hi = _mm_add_epi16(inv_alpha_hi, _mm_srli_epi16(inv_alpha_hi, 7)); \ - inv_alpha_lo = _mm_add_epi16(inv_alpha_lo, _mm_srli_epi16(inv_alpha_lo, 7)); \ + __m128i m256 = _mm_set1_epi16(256); \ + __m128i alpha_hi = _mm_shufflehi_epi16(_mm_shufflelo_epi16(fg_hi, _MM_SHUFFLE(3,3,3,3)), _MM_SHUFFLE(3,3,3,3)); \ + __m128i alpha_lo = _mm_shufflehi_epi16(_mm_shufflelo_epi16(fg_lo, _MM_SHUFFLE(3,3,3,3)), _MM_SHUFFLE(3,3,3,3)); \ + alpha_hi = _mm_add_epi16(alpha_hi, _mm_srli_epi16(alpha_hi, 7)); \ + alpha_lo = _mm_add_epi16(alpha_lo, _mm_srli_epi16(alpha_lo, 7)); \ + __m128i inv_alpha_hi = _mm_sub_epi16(m256, alpha_hi); \ + __m128i inv_alpha_lo = _mm_sub_epi16(m256, alpha_lo); \ + fg_hi = _mm_mullo_epi16(fg_hi, alpha_hi); \ + fg_hi = _mm_srli_epi16(fg_hi, 8); \ + fg_lo = _mm_mullo_epi16(fg_lo, alpha_lo); \ + fg_lo = _mm_srli_epi16(fg_lo, 8); \ + fg = _mm_packus_epi16(fg_lo, fg_hi); \ bg_hi = _mm_mullo_epi16(bg_hi, inv_alpha_hi); \ bg_hi = _mm_srli_epi16(bg_hi, 8); \ bg_lo = _mm_mullo_epi16(bg_lo, inv_alpha_lo); \ diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index 05574e9da..12e9d8549 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -354,22 +354,14 @@ void FTexture::GenerateBgraFromBitmap(const FBitmap &bitmap) { CreatePixelsBgraWithMipmaps(); - // Transpose and premultiply alpha + // Transpose const uint32_t *src = (const uint32_t *)bitmap.GetPixels(); uint32_t *dest = PixelsBgra.data(); for (int x = 0; x < Width; x++) { for (int y = 0; y < Height; y++) { - uint32_t p = src[x + y * Width]; - uint32_t red = RPART(p); - uint32_t green = GPART(p); - uint32_t blue = BPART(p); - uint32_t alpha = APART(p); - red = (red * alpha + 127) / 255; - green = (green * alpha + 127) / 255; - blue = (blue * alpha + 127) / 255; - dest[y + x * Height] = (alpha << 24) | (red << 16) | (green << 8) | blue; + dest[y + x * Height] = src[x + y * Width]; } }