From 6e53c1bd12019c63715987c0a76c79e868a05401 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 28 May 2016 20:40:33 +0200 Subject: [PATCH 001/100] Change render target output from PAL8 to BGRA8 --- src/basictypes.h | 6 + src/f_wipe.cpp | 17 +- src/m_misc.cpp | 2 + src/r_draw.cpp | 1106 ++++++++++++++++++++++++++++++-- src/r_draw.h | 12 +- src/r_drawt.cpp | 397 ++++++++++-- src/r_main.cpp | 8 +- src/r_main.h | 28 + src/r_plane.cpp | 24 +- src/r_segs.cpp | 107 ++- src/r_swrenderer.cpp | 4 + src/r_things.cpp | 73 ++- src/textures/canvastexture.cpp | 5 + src/v_draw.cpp | 45 +- src/v_video.cpp | 52 +- src/v_video.h | 14 +- src/win32/fb_d3d9.cpp | 38 +- src/win32/fb_ddraw.cpp | 10 +- src/win32/win32iface.h | 4 +- src/win32/win32video.cpp | 13 + 20 files changed, 1785 insertions(+), 180 deletions(-) diff --git a/src/basictypes.h b/src/basictypes.h index ff2cd972e..45e33a4a7 100644 --- a/src/basictypes.h +++ b/src/basictypes.h @@ -66,6 +66,12 @@ union QWORD_UNION typedef SDWORD fixed_t; typedef DWORD dsfixed_t; // fixedpt used by span drawer +#ifndef PALETTEOUTPUT +typedef uint32_t canvas_pixel_t; +#else +typedef BYTE canvas_pixel_t; +#endif + #define FIXED_MAX (signed)(0x7fffffff) #define FIXED_MIN (signed)(0x80000000) diff --git a/src/f_wipe.cpp b/src/f_wipe.cpp index a3ceb8d50..c6f20cadb 100644 --- a/src/f_wipe.cpp +++ b/src/f_wipe.cpp @@ -77,8 +77,10 @@ bool wipe_initMelt (int ticks) { int i, r; +#ifdef PALETTEOUTPUT // copy start screen to main screen screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); +#endif // makes this wipe faster (in theory) // to have stuff in column-major format @@ -271,7 +273,8 @@ bool wipe_doBurn (int ticks) // Draw the screen int xstep, ystep, firex, firey; int x, y; - BYTE *to, *fromold, *fromnew; + canvas_pixel_t *to; + BYTE *fromold, *fromnew; const int SHIFT = 16; xstep = (FIREWIDTH << SHIFT) / SCREENWIDTH; @@ -298,6 +301,9 @@ bool wipe_doBurn (int ticks) } else { +#ifndef PALETTEOUTPUT + // TO DO: RGB32k.All +#else int bglevel = 64-fglevel; DWORD *fg2rgb = Col2RGB8[fglevel]; DWORD *bg2rgb = Col2RGB8[bglevel]; @@ -305,6 +311,7 @@ bool wipe_doBurn (int ticks) DWORD bg = bg2rgb[fromold[x]]; fg = (fg+bg) | 0x1f07c1f; to[x] = RGB32k.All[fg & (fg>>15)]; +#endif done = false; } } @@ -335,7 +342,9 @@ bool wipe_doFade (int ticks) fade += ticks * 2; if (fade > 64) { +#ifdef PALETTEOUTPUT screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_end); +#endif return true; } else @@ -346,7 +355,7 @@ bool wipe_doFade (int ticks) DWORD *bg2rgb = Col2RGB8[bglevel]; BYTE *fromnew = (BYTE *)wipe_scr_end; BYTE *fromold = (BYTE *)wipe_scr_start; - BYTE *to = screen->GetBuffer(); + canvas_pixel_t *to = screen->GetBuffer(); for (y = 0; y < SCREENHEIGHT; y++) { @@ -387,7 +396,9 @@ bool wipe_StartScreen (int type) if (CurrentWipeType) { wipe_scr_start = new short[SCREENWIDTH * SCREENHEIGHT / 2]; +#ifdef PALETTEOUTPUT screen->GetBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); +#endif return true; } return false; @@ -398,8 +409,10 @@ void wipe_EndScreen (void) if (CurrentWipeType) { wipe_scr_end = new short[SCREENWIDTH * SCREENHEIGHT / 2]; +#ifdef PALETTEOUTPUT screen->GetBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_end); screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); // restore start scr. +#endif // Initialize the wipe (*wipes[(CurrentWipeType-1)*3])(0); } diff --git a/src/m_misc.cpp b/src/m_misc.cpp index 87f61f253..79416c31d 100644 --- a/src/m_misc.cpp +++ b/src/m_misc.cpp @@ -655,6 +655,7 @@ static bool FindFreeName (FString &fullname, const char *extension) void M_ScreenShot (const char *filename) { +#ifdef PALETTEOUTPUT FILE *file; FString autoname; bool writepcx = (stricmp (screenshot_type, "pcx") == 0); // PNG is the default @@ -743,6 +744,7 @@ void M_ScreenShot (const char *filename) Printf ("Could not create screenshot.\n"); } } +#endif } CCMD (screenshot) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 80b91ed2d..044910008 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -42,6 +42,9 @@ #include "gi.h" #include "stats.h" #include "x86.h" +#ifndef NO_SSE +#include +#endif #undef RANGECHECK @@ -61,7 +64,7 @@ extern int ST_Y; BYTE* viewimage; extern "C" { int ylookup[MAXHEIGHT]; -BYTE *dc_destorg; +canvas_pixel_t *dc_destorg; } int scaledviewwidth; @@ -90,6 +93,7 @@ extern "C" { int dc_pitch=0xABadCafe; // [RH] Distance between rows lighttable_t* dc_colormap; +fixed_t dc_light; int dc_x; int dc_yl; int dc_yh; @@ -103,12 +107,13 @@ DWORD *dc_destblend; // blending lookups // first pixel in a column (possibly virtual) const BYTE* dc_source; -BYTE* dc_dest; +canvas_pixel_t* dc_dest; int dc_count; DWORD vplce[4]; DWORD vince[4]; BYTE* palookupoffse[4]; +fixed_t palookuplight[4]; const BYTE* bufplce[4]; // just for profiling @@ -180,7 +185,7 @@ void R_InitShadeMaps() void R_DrawColumnP_C (void) { int count; - BYTE* dest; + canvas_pixel_t* dest; fixed_t frac; fixed_t fracstep; @@ -193,6 +198,10 @@ void R_DrawColumnP_C (void) // Framebuffer destination address. dest = dc_dest; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); +#endif + // Determine scaling, // which is the only mapping to be done. fracstep = dc_iscale; @@ -212,7 +221,11 @@ void R_DrawColumnP_C (void) { // Re-map color indices from wall texture column // using a lighting/special effects LUT. - *dest = colormap[source[frac>>FRACBITS]]; +#ifndef PALETTEOUTPUT + *dest = shade_pal_index(colormap[source[frac>>FRACBITS]], light); +#else + *dest = colormap[source[frac >> FRACBITS]]; +#endif dest += pitch; frac += fracstep; @@ -226,7 +239,7 @@ void R_DrawColumnP_C (void) void R_FillColumnP (void) { int count; - BYTE* dest; + canvas_pixel_t* dest; count = dc_count; @@ -235,13 +248,21 @@ void R_FillColumnP (void) dest = dc_dest; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); +#endif + { int pitch = dc_pitch; BYTE color = dc_color; do { +#ifndef PALETTEOUTPUT + *dest = shade_pal_index(color, light); +#else *dest = color; +#endif dest += pitch; } while (--count); } @@ -250,19 +271,39 @@ void R_FillColumnP (void) void R_FillAddColumn (void) { int count; - BYTE *dest; + canvas_pixel_t *dest; count = dc_count; if (count <= 0) return; dest = dc_dest; + int pitch = dc_pitch; + +#ifndef PALETTEOUTPUT + uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; + uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; + uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; + + do + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); +#else DWORD *bg2rgb; DWORD fg; bg2rgb = dc_destblend; fg = dc_srccolor; - int pitch = dc_pitch; do { @@ -271,25 +312,45 @@ void R_FillAddColumn (void) *dest = RGB32k.All[bg & (bg>>15)]; dest += pitch; } while (--count); - +#endif } void R_FillAddClampColumn (void) { int count; - BYTE *dest; + canvas_pixel_t *dest; count = dc_count; if (count <= 0) return; dest = dc_dest; + int pitch = dc_pitch; + +#ifndef PALETTEOUTPUT + uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; + uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; + uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; + + do + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); +#else DWORD *bg2rgb; DWORD fg; bg2rgb = dc_destblend; fg = dc_srccolor; - int pitch = dc_pitch; do { @@ -304,25 +365,45 @@ void R_FillAddClampColumn (void) *dest = RGB32k.All[a & (a>>15)]; dest += pitch; } while (--count); - +#endif } void R_FillSubClampColumn (void) { int count; - BYTE *dest; + canvas_pixel_t *dest; count = dc_count; if (count <= 0) return; dest = dc_dest; + int pitch = dc_pitch; + +#ifndef PALETTEOUTPUT + uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; + uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; + uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; + + do + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 255; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 255; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 255; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); +#else DWORD *bg2rgb; DWORD fg; bg2rgb = dc_destblend; fg = dc_srccolor | 0x40100400; - int pitch = dc_pitch; do { @@ -336,25 +417,45 @@ void R_FillSubClampColumn (void) *dest = RGB32k.All[a & (a>>15)]; dest += pitch; } while (--count); - +#endif } void R_FillRevSubClampColumn (void) { int count; - BYTE *dest; + canvas_pixel_t *dest; count = dc_count; if (count <= 0) return; dest = dc_dest; + int pitch = dc_pitch; + +#ifndef PALETTEOUTPUT + uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; + uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; + uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; + + do + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 255; + uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 255; + uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 255; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); +#else DWORD *bg2rgb; DWORD fg; bg2rgb = dc_destblend; fg = dc_srccolor; - int pitch = dc_pitch; do { @@ -368,7 +469,7 @@ void R_FillRevSubClampColumn (void) *dest = RGB32k.All[a & (a>>15)]; dest += pitch; } while (--count); - +#endif } // @@ -421,7 +522,7 @@ void R_InitFuzzTable (int fuzzoff) void R_DrawFuzzColumnP_C (void) { int count; - BYTE *dest; + canvas_pixel_t *dest; // Adjust borders. Low... if (dc_yl == 0) @@ -441,6 +542,85 @@ void R_DrawFuzzColumnP_C (void) dest = ylookup[dc_yl] + dc_x + dc_destorg; +#ifndef PALETTEOUTPUT + + // Note: this implementation assumes this function is only used for the pinky shadow effect (i.e. no other fancy colormap than black) + // I'm not sure if this is really always the case or not. + + { + // [RH] Make local copies of global vars to try and improve + // the optimizations made by the compiler. + int pitch = dc_pitch; + int fuzz = fuzzpos; + int cnt; + + // [RH] Split this into three separate loops to minimize + // the number of times fuzzpos needs to be clamped. + if (fuzz) + { + cnt = MIN(FUZZTABLE - fuzz, count); + count -= cnt; + do + { + uint32_t bg = dest[fuzzoffset[fuzz++]]; + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = (bg) & 0xff; + + uint32_t red = bg_red * 3 / 4; + uint32_t green = bg_green * 3 / 4; + uint32_t blue = bg_blue * 3 / 4; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--cnt); + } + if (fuzz == FUZZTABLE || count > 0) + { + while (count >= FUZZTABLE) + { + fuzz = 0; + cnt = FUZZTABLE; + count -= FUZZTABLE; + do + { + uint32_t bg = dest[fuzzoffset[fuzz++]]; + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = (bg) & 0xff; + + uint32_t red = bg_red * 3 / 4; + uint32_t green = bg_green * 3 / 4; + uint32_t blue = bg_blue * 3 / 4; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--cnt); + } + fuzz = 0; + if (count > 0) + { + do + { + uint32_t bg = dest[fuzzoffset[fuzz++]]; + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = (bg) & 0xff; + + uint32_t red = bg_red * 3 / 4; + uint32_t green = bg_green * 3 / 4; + uint32_t blue = bg_blue * 3 / 4; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); + } + } + fuzzpos = fuzz; + } + +#else + // colormap #6 is used for shading (of 0-31, a bit brighter than average) { // [RH] Make local copies of global vars to try and improve @@ -487,6 +667,7 @@ void R_DrawFuzzColumnP_C (void) } fuzzpos = fuzz; } +#endif } #endif @@ -539,7 +720,7 @@ algorithm that uses RGB tables. void R_DrawAddColumnP_C (void) { int count; - BYTE *dest; + canvas_pixel_t *dest; fixed_t frac; fixed_t fracstep; @@ -552,6 +733,34 @@ void R_DrawAddColumnP_C (void) fracstep = dc_iscale; frac = dc_texturefrac; +#ifndef PALETTEOUTPUT + { + const BYTE *source = dc_source; + int pitch = dc_pitch; + BYTE *colormap = dc_colormap; + + do + { + uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], 0); + + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } +#else { DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; @@ -572,6 +781,7 @@ void R_DrawAddColumnP_C (void) frac += fracstep; } while (--count); } +#endif } // @@ -585,7 +795,7 @@ void R_DrawAddColumnP_C (void) void R_DrawTranslatedColumnP_C (void) { int count; - BYTE* dest; + canvas_pixel_t* dest; fixed_t frac; fixed_t fracstep; @@ -593,6 +803,10 @@ void R_DrawTranslatedColumnP_C (void) if (count <= 0) return; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); +#endif + dest = dc_dest; fracstep = dc_iscale; @@ -607,7 +821,11 @@ void R_DrawTranslatedColumnP_C (void) do { +#ifndef PALETTEOUTPUT + *dest = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); +#else *dest = colormap[translation[source[frac>>FRACBITS]]]; +#endif dest += pitch; frac += fracstep; @@ -619,7 +837,7 @@ void R_DrawTranslatedColumnP_C (void) void R_DrawTlatedAddColumnP_C (void) { int count; - BYTE *dest; + canvas_pixel_t *dest; fixed_t frac; fixed_t fracstep; @@ -627,11 +845,44 @@ void R_DrawTlatedAddColumnP_C (void) if (count <= 0) return; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); +#endif + dest = dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; +#ifndef PALETTEOUTPUT + { + BYTE *translation = dc_translation; + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + + do + { + uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } +#else { DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; @@ -647,12 +898,13 @@ void R_DrawTlatedAddColumnP_C (void) fg = fg2rgb[fg]; bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg>>15)]; + fg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg >> 15)]; dest += pitch; frac += fracstep; } while (--count); } +#endif } // Draw a column whose "color" values are actually translucency @@ -660,7 +912,7 @@ void R_DrawTlatedAddColumnP_C (void) void R_DrawShadedColumnP_C (void) { int count; - BYTE *dest; + canvas_pixel_t *dest; fixed_t frac, fracstep; count = dc_count; @@ -673,6 +925,36 @@ void R_DrawShadedColumnP_C (void) fracstep = dc_iscale; frac = dc_texturefrac; +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + { + const BYTE *source = dc_source; + BYTE *colormap = dc_colormap; + int pitch = dc_pitch; + + do + { + DWORD alpha = clamp(colormap[source[frac >> FRACBITS]], 0, 64); + DWORD inv_alpha = 64 - alpha; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; + uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; + uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } +#else { const BYTE *source = dc_source; BYTE *colormap = dc_colormap; @@ -690,13 +972,14 @@ void R_DrawShadedColumnP_C (void) frac += fracstep; } while (--count); } +#endif } // Add source to destination, clamping it to white void R_DrawAddClampColumnP_C () { int count; - BYTE *dest; + canvas_pixel_t *dest; fixed_t frac; fixed_t fracstep; @@ -709,6 +992,34 @@ void R_DrawAddClampColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; +#ifndef PALETTEOUTPUT + { + const BYTE *source = dc_source; + BYTE *colormap = dc_colormap; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + + do + { + uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } +#else { BYTE *colormap = dc_colormap; const BYTE *source = dc_source; @@ -731,13 +1042,14 @@ void R_DrawAddClampColumnP_C () frac += fracstep; } while (--count); } +#endif } // Add translated source to destination, clamping it to white void R_DrawAddClampTranslatedColumnP_C () { int count; - BYTE *dest; + canvas_pixel_t *dest; fixed_t frac; fixed_t fracstep; @@ -750,6 +1062,35 @@ void R_DrawAddClampTranslatedColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; +#ifndef PALETTEOUTPUT + { + BYTE *translation = dc_translation; + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + + do + { + uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } +#else { BYTE *translation = dc_translation; BYTE *colormap = dc_colormap; @@ -773,13 +1114,14 @@ void R_DrawAddClampTranslatedColumnP_C () frac += fracstep; } while (--count); } +#endif } // Subtract destination from source, clamping it to black void R_DrawSubClampColumnP_C () { int count; - BYTE *dest; + canvas_pixel_t *dest; fixed_t frac; fixed_t fracstep; @@ -792,6 +1134,34 @@ void R_DrawSubClampColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; +#ifndef PALETTEOUTPUT + { + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + + do + { + uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } +#else { BYTE *colormap = dc_colormap; const BYTE *source = dc_source; @@ -813,13 +1183,14 @@ void R_DrawSubClampColumnP_C () frac += fracstep; } while (--count); } +#endif } // Subtract destination from source, clamping it to black void R_DrawSubClampTranslatedColumnP_C () { int count; - BYTE *dest; + canvas_pixel_t *dest; fixed_t frac; fixed_t fracstep; @@ -832,6 +1203,35 @@ void R_DrawSubClampTranslatedColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; +#ifndef PALETTEOUTPUT + { + BYTE *translation = dc_translation; + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + + do + { + uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } +#else { BYTE *translation = dc_translation; BYTE *colormap = dc_colormap; @@ -854,13 +1254,14 @@ void R_DrawSubClampTranslatedColumnP_C () frac += fracstep; } while (--count); } +#endif } // Subtract source from destination, clamping it to black void R_DrawRevSubClampColumnP_C () { int count; - BYTE *dest; + canvas_pixel_t *dest; fixed_t frac; fixed_t fracstep; @@ -873,6 +1274,34 @@ void R_DrawRevSubClampColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; +#ifndef PALETTEOUTPUT + { + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + + do + { + uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } +#else { BYTE *colormap = dc_colormap; const BYTE *source = dc_source; @@ -894,13 +1323,14 @@ void R_DrawRevSubClampColumnP_C () frac += fracstep; } while (--count); } +#endif } // Subtract source from destination, clamping it to black void R_DrawRevSubClampTranslatedColumnP_C () { int count; - BYTE *dest; + canvas_pixel_t *dest; fixed_t frac; fixed_t fracstep; @@ -913,6 +1343,35 @@ void R_DrawRevSubClampTranslatedColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; +#ifndef PALETTEOUTPUT + { + BYTE *translation = dc_translation; + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + + do + { + uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } +#else { BYTE *translation = dc_translation; BYTE *colormap = dc_colormap; @@ -935,6 +1394,7 @@ void R_DrawRevSubClampTranslatedColumnP_C () frac += fracstep; } while (--count); } +#endif } @@ -967,6 +1427,7 @@ int ds_x1; int ds_x2; lighttable_t* ds_colormap; +//dsfixed_t ds_light; dsfixed_t ds_xfrac; dsfixed_t ds_yfrac; @@ -1019,6 +1480,7 @@ void R_SetSpanSource(const BYTE *pixels) void R_SetSpanColormap(BYTE *colormap) { ds_colormap = colormap; + ds_light = 0; #ifdef X86_ASM if (ds_colormap != ds_curcolormap) { @@ -1062,7 +1524,7 @@ void R_DrawSpanP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - BYTE* dest; + canvas_pixel_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -1087,9 +1549,64 @@ void R_DrawSpanP_C (void) xstep = ds_xstep; ystep = ds_ystep; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(ds_light); +#endif + if (ds_xbits == 6 && ds_ybits == 6) { // 64x64 is the most common case by far, so special case it. + +#ifndef PALETTEOUTPUT +#ifndef NO_SSE + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + + int sse_count = count / 4; + count -= sse_count * 4; + while (sse_count--) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p0 = colormap[source[spot]]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p1 = colormap[source[spot]]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p2 = colormap[source[spot]]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p3 = colormap[source[spot]]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m128i fg = _mm_set_epi32(palette[p0], palette[p1], palette[p2], palette[p3]); + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + fg_hi = _mm_mullo_epi16(fg_hi, mlight); + fg_hi = _mm_srli_epi16(fg_hi, 8); + fg_lo = _mm_mullo_epi16(fg_lo, mlight); + fg_lo = _mm_srli_epi16(fg_lo, 8); + fg = _mm_packus_epi16(fg_hi, fg_lo); + _mm_storeu_si128((__m128i*)dest, fg); + + // Next step in u,v. + dest += 4; + } + if (count == 0) + return; +#endif +#endif + do { // Current texture index in u,v. @@ -1097,7 +1614,11 @@ void R_DrawSpanP_C (void) // Lookup pixel from flat texture tile, // re-index using light/colormap. +#ifndef PALETTEOUTPUT + *dest++ = shade_pal_index(colormap[source[spot]], light); +#else *dest++ = colormap[source[spot]]; +#endif // Next step in u,v. xfrac += xstep; @@ -1117,7 +1638,11 @@ void R_DrawSpanP_C (void) // Lookup pixel from flat texture tile, // re-index using light/colormap. +#ifndef PALETTEOUTPUT + *dest++ = shade_pal_index(colormap[source[spot]], light); +#else *dest++ = colormap[source[spot]]; +#endif // Next step in u,v. xfrac += xstep; @@ -1133,12 +1658,16 @@ void R_DrawSpanMaskedP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - BYTE* dest; + canvas_pixel_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; int spot; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(ds_light); +#endif + xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -1160,7 +1689,11 @@ void R_DrawSpanMaskedP_C (void) texdata = source[spot]; if (texdata != 0) { +#ifndef PALETTEOUTPUT + *dest = shade_pal_index(colormap[texdata], light); +#else *dest = colormap[texdata]; +#endif } dest++; xfrac += xstep; @@ -1180,7 +1713,11 @@ void R_DrawSpanMaskedP_C (void) texdata = source[spot]; if (texdata != 0) { +#ifndef PALETTEOUTPUT + *dest = shade_pal_index(colormap[texdata], light); +#else *dest = colormap[texdata]; +#endif } dest++; xfrac += xstep; @@ -1196,7 +1733,7 @@ void R_DrawSpanTranslucentP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - BYTE* dest; + canvas_pixel_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -1214,9 +1751,35 @@ void R_DrawSpanTranslucentP_C (void) xstep = ds_xstep; ystep = ds_ystep; + uint32_t light = calc_light_multiplier(ds_light); + if (ds_xbits == 6 && ds_ybits == 6) { // 64x64 is the most common case by far, so special case it. +#ifndef PALETTEOUTPUT + do + { + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + + uint32_t fg = shade_pal_index(colormap[source[spot]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; + + xfrac += xstep; + yfrac += ystep; + } while (--count); +#else do { spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); @@ -1229,9 +1792,37 @@ void R_DrawSpanTranslucentP_C (void) xfrac += xstep; yfrac += ystep; } while (--count); +#endif } else { +#ifndef PALETTEOUTPUT + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + uint32_t fg = shade_pal_index(colormap[source[spot]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; + + xfrac += xstep; + yfrac += ystep; + } while (--count); +#else BYTE yshift = 32 - ds_ybits; BYTE xshift = yshift - ds_xbits; int xmask = ((1 << ds_xbits) - 1) << ds_ybits; @@ -1247,6 +1838,7 @@ void R_DrawSpanTranslucentP_C (void) xfrac += xstep; yfrac += ystep; } while (--count); +#endif } } @@ -1256,7 +1848,7 @@ void R_DrawSpanMaskedTranslucentP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - BYTE* dest; + canvas_pixel_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -1264,6 +1856,8 @@ void R_DrawSpanMaskedTranslucentP_C (void) DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + uint32_t light = calc_light_multiplier(ds_light); + xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -1285,12 +1879,29 @@ void R_DrawSpanMaskedTranslucentP_C (void) texdata = source[spot]; if (texdata != 0) { +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD fg = colormap[texdata]; DWORD bg = *dest; fg = fg2rgb[fg]; bg = bg2rgb[bg]; fg = (fg+bg) | 0x1f07c1f; *dest = RGB32k.All[fg & (fg>>15)]; +#endif } dest++; xfrac += xstep; @@ -1310,12 +1921,29 @@ void R_DrawSpanMaskedTranslucentP_C (void) texdata = source[spot]; if (texdata != 0) { +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD fg = colormap[texdata]; DWORD bg = *dest; fg = fg2rgb[fg]; bg = bg2rgb[bg]; fg = (fg+bg) | 0x1f07c1f; *dest = RGB32k.All[fg & (fg>>15)]; +#endif } dest++; xfrac += xstep; @@ -1330,7 +1958,7 @@ void R_DrawSpanAddClampP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - BYTE* dest; + canvas_pixel_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -1338,6 +1966,8 @@ void R_DrawSpanAddClampP_C (void) DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + uint32_t light = calc_light_multiplier(ds_light); + xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -1354,6 +1984,23 @@ void R_DrawSpanAddClampP_C (void) do { spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); + +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(colormap[source[spot]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; DWORD b = a; @@ -1363,6 +2010,8 @@ void R_DrawSpanAddClampP_C (void) b = b - (b >> 5); a |= b; *dest++ = RGB32k.All[a & (a>>15)]; +#endif + xfrac += xstep; yfrac += ystep; } while (--count); @@ -1375,6 +2024,23 @@ void R_DrawSpanAddClampP_C (void) do { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(colormap[source[spot]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; DWORD b = a; @@ -1384,6 +2050,8 @@ void R_DrawSpanAddClampP_C (void) b = b - (b >> 5); a |= b; *dest++ = RGB32k.All[a & (a>>15)]; +#endif + xfrac += xstep; yfrac += ystep; } while (--count); @@ -1396,7 +2064,7 @@ void R_DrawSpanMaskedAddClampP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - BYTE* dest; + canvas_pixel_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -1404,6 +2072,8 @@ void R_DrawSpanMaskedAddClampP_C (void) DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + uint32_t light = calc_light_multiplier(ds_light); + xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -1425,6 +2095,22 @@ void R_DrawSpanMaskedAddClampP_C (void) texdata = source[spot]; if (texdata != 0) { +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; DWORD b = a; @@ -1434,6 +2120,7 @@ void R_DrawSpanMaskedAddClampP_C (void) b = b - (b >> 5); a |= b; *dest = RGB32k.All[a & (a>>15)]; +#endif } dest++; xfrac += xstep; @@ -1453,6 +2140,22 @@ void R_DrawSpanMaskedAddClampP_C (void) texdata = source[spot]; if (texdata != 0) { +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; DWORD b = a; @@ -1462,6 +2165,7 @@ void R_DrawSpanMaskedAddClampP_C (void) b = b - (b >> 5); a |= b; *dest = RGB32k.All[a & (a>>15)]; +#endif } dest++; xfrac += xstep; @@ -1473,7 +2177,16 @@ void R_DrawSpanMaskedAddClampP_C (void) // [RH] Just fill a span with a color void R_FillSpan (void) { - memset (ylookup[ds_y] + ds_x1 + dc_destorg, ds_color, ds_x2 - ds_x1 + 1); +#ifndef PALETTEOUTPUT + canvas_pixel_t *dest = ylookup[ds_y] + ds_x1 + dc_destorg; + int count = (ds_x2 - ds_x1 + 1); + uint32_t light = calc_light_multiplier(ds_light); + uint32_t color = shade_pal_index(ds_color, light); + for (int i = 0; i < count; i++) + dest[i] = color; +#else + memset (ylookup[ds_y] + ds_x1 + dc_destorg, ds_color, (ds_x2 - ds_x1 + 1) * sizeof(canvas_pixel_t)); +#endif } // Draw a voxel slab @@ -1492,7 +2205,7 @@ extern "C" void R_SetupDrawSlabC(const BYTE *colormap) slabcolormap = colormap; } -extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p) +extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, canvas_pixel_t *p) { int x; const BYTE *colormap = slabcolormap; @@ -1666,13 +2379,21 @@ DWORD vlinec1 () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int bits = vlinebits; int pitch = dc_pitch; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); +#endif + do { - *dest = colormap[source[frac>>bits]]; +#ifndef PALETTEOUTPUT + *dest = shade_pal_index(colormap[source[frac>>bits]], light); +#else + *dest = colormap[source[frac >> bits]]; +#endif frac += fracstep; dest += pitch; } while (--count); @@ -1682,19 +2403,83 @@ DWORD vlinec1 () void vlinec4 () { - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int count = dc_count; int bits = vlinebits; DWORD place; +#ifndef PALETTEOUTPUT + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); +#ifndef NO_SSE + __m128i mlight_hi = _mm_set_epi16(256, light0, light0, light0, 256, light1, light1, light1); + __m128i mlight_lo = _mm_set_epi16(256, light2, light2, light2, 256, light3, light3, light3); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; +#endif +#endif + do { +#ifndef PALETTEOUTPUT +#ifndef NO_SSE + + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + BYTE p0 = palookupoffse[0][bufplce[0][place0 >> bits]]; + BYTE p1 = palookupoffse[1][bufplce[1][place1 >> bits]]; + BYTE p2 = palookupoffse[2][bufplce[2][place2 >> bits]]; + BYTE p3 = palookupoffse[3][bufplce[3][place3 >> bits]]; + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(palette[p0], palette[p1], palette[p2], palette[p3]); + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + fg_hi = _mm_mullo_epi16(fg_hi, mlight_hi); + fg_hi = _mm_srli_epi16(fg_hi, 8); + fg_lo = _mm_mullo_epi16(fg_lo, mlight_lo); + fg_lo = _mm_srli_epi16(fg_lo, 8); + fg = _mm_packus_epi16(fg_hi, fg_lo); + _mm_storeu_si128((__m128i*)dest, fg); + +#else + dest[0] = shade_pal_index(palookupoffse[0][bufplce[0][(place = vplce[0]) >> bits]], light0); vplce[0] = place + vince[0]; + dest[1] = shade_pal_index(palookupoffse[1][bufplce[1][(place = vplce[1]) >> bits]], light1); vplce[1] = place + vince[1]; + dest[2] = shade_pal_index(palookupoffse[2][bufplce[2][(place = vplce[2]) >> bits]], light2); vplce[2] = place + vince[2]; + dest[3] = shade_pal_index(palookupoffse[3][bufplce[3][(place = vplce[3]) >> bits]], light3); vplce[3] = place + vince[3]; +#endif +#else dest[0] = palookupoffse[0][bufplce[0][(place=vplce[0])>>bits]]; vplce[0] = place+vince[0]; dest[1] = palookupoffse[1][bufplce[1][(place=vplce[1])>>bits]]; vplce[1] = place+vince[1]; dest[2] = palookupoffse[2][bufplce[2][(place=vplce[2])>>bits]]; vplce[2] = place+vince[2]; dest[3] = palookupoffse[3][bufplce[3][(place=vplce[3])>>bits]]; vplce[3] = place+vince[3]; +#endif dest += dc_pitch; } while (--count); + +#ifndef PALETTEOUTPUT +#ifndef NO_SSE + // Is this needed? Global variables makes it tricky to know.. + vplce[0] = local_vplce[0]; + vplce[1] = local_vplce[1]; + vplce[2] = local_vplce[2]; + vplce[3] = local_vplce[3]; + vince[0] = local_vince[0]; + vince[1] = local_vince[1]; + vince[2] = local_vince[2]; + vince[3] = local_vince[3]; +#endif +#endif } #endif @@ -1717,16 +2502,24 @@ DWORD mvlinec1 () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int bits = mvlinebits; int pitch = dc_pitch; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); +#endif + do { BYTE pix = source[frac>>bits]; if (pix != 0) { +#ifndef PALETTEOUTPUT + *dest = shade_pal_index(colormap[pix], light); +#else *dest = colormap[pix]; +#endif } frac += fracstep; dest += pitch; @@ -1737,19 +2530,33 @@ DWORD mvlinec1 () void mvlinec4 () { - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int count = dc_count; int bits = mvlinebits; DWORD place; +#ifndef PALETTEOUTPUT + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); +#endif + do { BYTE pix; +#ifndef PALETTEOUTPUT + pix = bufplce[0][(place = vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(palookupoffse[0][pix], light0); vplce[0] = place + vince[0]; + pix = bufplce[1][(place = vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(palookupoffse[1][pix], light1); vplce[1] = place + vince[1]; + pix = bufplce[2][(place = vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(palookupoffse[2][pix], light2); vplce[2] = place + vince[2]; + pix = bufplce[3][(place = vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(palookupoffse[3][pix], light3); vplce[3] = place + vince[3]; +#else pix = bufplce[0][(place=vplce[0])>>bits]; if(pix) dest[0] = palookupoffse[0][pix]; vplce[0] = place+vince[0]; pix = bufplce[1][(place=vplce[1])>>bits]; if(pix) dest[1] = palookupoffse[1][pix]; vplce[1] = place+vince[1]; pix = bufplce[2][(place=vplce[2])>>bits]; if(pix) dest[2] = palookupoffse[2][pix]; vplce[2] = place+vince[2]; pix = bufplce[3][(place=vplce[3])>>bits]; if(pix) dest[3] = palookupoffse[3][pix]; vplce[3] = place+vince[3]; +#endif dest += dc_pitch; } while (--count); } @@ -1763,7 +2570,11 @@ extern int wallshade; static void R_DrawFogBoundarySection (int y, int y2, int x1) { BYTE *colormap = dc_colormap; - BYTE *dest = ylookup[y] + dc_destorg; + canvas_pixel_t *dest = ylookup[y] + dc_destorg; + +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); +#endif for (; y < y2; ++y) { @@ -1771,7 +2582,11 @@ static void R_DrawFogBoundarySection (int y, int y2, int x1) int x = x1; do { +#ifndef PALETTEOUTPUT + dest[x] = shade_pal_index(colormap[dest[x]], light); +#else dest[x] = colormap[dest[x]]; +#endif } while (++x <= x2); dest += dc_pitch; } @@ -1781,10 +2596,19 @@ static void R_DrawFogBoundaryLine (int y, int x) { int x2 = spanend[y]; BYTE *colormap = dc_colormap; - BYTE *dest = ylookup[y] + dc_destorg; + canvas_pixel_t *dest = ylookup[y] + dc_destorg; + +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); +#endif + do { +#ifndef PALETTEOUTPUT + dest[x] = shade_pal_index(colormap[dest[x]], light); +#else dest[x] = colormap[dest[x]]; +#endif } while (++x <= x2); } @@ -1809,6 +2633,7 @@ void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip) } dc_colormap = basecolormapdata + (rcolormap << COLORMAPSHIFT); + dc_light = 0; for (--x; x >= x1; --x) { @@ -1834,6 +2659,7 @@ void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip) } rcolormap = lcolormap; dc_colormap = basecolormapdata + (lcolormap << COLORMAPSHIFT); + dc_light = 0; } else { @@ -1891,15 +2717,37 @@ fixed_t tmvline1_add () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + uint32_t light = calc_light_multiplier(dc_light); + do { +#ifndef PALETTEOUTPUT + BYTE pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(colormap[pix], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } +#else BYTE pix = source[frac>>bits]; if (pix != 0) { @@ -1908,6 +2756,7 @@ fixed_t tmvline1_add () fg = (fg+bg) | 0x1f07c1f; *dest = RGB32k.All[fg & (fg>>15)]; } +#endif frac += fracstep; dest += pitch; } while (--count); @@ -1917,13 +2766,19 @@ fixed_t tmvline1_add () void tmvline4_add () { - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int count = dc_count; int bits = tmvlinebits; DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + do { for (int i = 0; i < 4; ++i) @@ -1931,10 +2786,27 @@ void tmvline4_add () BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD fg = fg2rgb[palookupoffse[i][pix]]; DWORD bg = bg2rgb[dest[i]]; fg = (fg+bg) | 0x1f07c1f; dest[i] = RGB32k.All[fg & (fg>>15)]; +#endif } vplce[i] += vince[i]; } @@ -1949,18 +2821,36 @@ fixed_t tmvline1_addclamp () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + uint32_t light = calc_light_multiplier(dc_light); + do { BYTE pix = source[frac>>bits]; if (pix != 0) { +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(colormap[pix], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD a = fg2rgb[colormap[pix]] + bg2rgb[*dest]; DWORD b = a; @@ -1970,6 +2860,7 @@ fixed_t tmvline1_addclamp () b = b - (b >> 5); a |= b; *dest = RGB32k.All[a & (a>>15)]; +#endif } frac += fracstep; dest += pitch; @@ -1980,13 +2871,19 @@ fixed_t tmvline1_addclamp () void tmvline4_addclamp () { - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int count = dc_count; int bits = tmvlinebits; DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + do { for (int i = 0; i < 4; ++i) @@ -1994,6 +2891,22 @@ void tmvline4_addclamp () BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD a = fg2rgb[palookupoffse[i][pix]] + bg2rgb[dest[i]]; DWORD b = a; @@ -2003,6 +2916,7 @@ void tmvline4_addclamp () b = b - (b >> 5); a |= b; dest[i] = RGB32k.All[a & (a>>15)]; +#endif } vplce[i] += vince[i]; } @@ -2017,18 +2931,36 @@ fixed_t tmvline1_subclamp () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + uint32_t light = calc_light_multiplier(dc_light); + do { BYTE pix = source[frac>>bits]; if (pix != 0) { +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(colormap[pix], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD a = (fg2rgb[colormap[pix]] | 0x40100400) - bg2rgb[*dest]; DWORD b = a; @@ -2037,6 +2969,7 @@ fixed_t tmvline1_subclamp () a &= b; a |= 0x01f07c1f; *dest = RGB32k.All[a & (a>>15)]; +#endif } frac += fracstep; dest += pitch; @@ -2047,13 +2980,19 @@ fixed_t tmvline1_subclamp () void tmvline4_subclamp () { - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int count = dc_count; int bits = tmvlinebits; DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + do { for (int i = 0; i < 4; ++i) @@ -2061,6 +3000,22 @@ void tmvline4_subclamp () BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD a = (fg2rgb[palookupoffse[i][pix]] | 0x40100400) - bg2rgb[dest[i]]; DWORD b = a; @@ -2069,6 +3024,7 @@ void tmvline4_subclamp () a &= b; a |= 0x01f07c1f; dest[i] = RGB32k.All[a & (a>>15)]; +#endif } vplce[i] += vince[i]; } @@ -2083,18 +3039,36 @@ fixed_t tmvline1_revsubclamp () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + uint32_t light = calc_light_multiplier(dc_light); + do { BYTE pix = source[frac>>bits]; if (pix != 0) { +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(colormap[pix], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[pix]]; DWORD b = a; @@ -2103,6 +3077,7 @@ fixed_t tmvline1_revsubclamp () a &= b; a |= 0x01f07c1f; *dest = RGB32k.All[a & (a>>15)]; +#endif } frac += fracstep; dest += pitch; @@ -2113,13 +3088,19 @@ fixed_t tmvline1_revsubclamp () void tmvline4_revsubclamp () { - BYTE *dest = dc_dest; + canvas_pixel_t *dest = dc_dest; int count = dc_count; int bits = tmvlinebits; DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + do { for (int i = 0; i < 4; ++i) @@ -2127,6 +3108,22 @@ void tmvline4_revsubclamp () BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; +#else DWORD a = (bg2rgb[dest[i]] | 0x40100400) - fg2rgb[palookupoffse[i][pix]]; DWORD b = a; @@ -2135,6 +3132,7 @@ void tmvline4_revsubclamp () a &= b; a |= 0x01f07c1f; dest[i] = RGB32k.All[a & (a>>15)]; +#endif } vplce[i] += vince[i]; } @@ -2418,6 +3416,7 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, { dc_colormap += fixedlightlev; } + dc_light = 0; return r_columnmethod ? DoDraw1 : DoDraw0; } @@ -2443,6 +3442,7 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, dc_srccolor = ((((r*x)>>4)<<20) | ((g*x)>>4) | ((((b)*x)>>4)<<10)) & 0x3feffbff; hcolfunc_pre = R_FillColumnHorizP; dc_colormap = identitymap; + dc_light = 0; } if (!R_SetBlendFunc (style.BlendOp, fglevel, bglevel, style.Flags)) diff --git a/src/r_draw.h b/src/r_draw.h index cb2f68f33..6f7a91154 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -30,6 +30,7 @@ extern "C" int ylookup[MAXHEIGHT]; extern "C" int dc_pitch; // [RH] Distance between rows extern "C" lighttable_t*dc_colormap; +extern "C" fixed_t dc_light; extern "C" int dc_x; extern "C" int dc_yl; extern "C" int dc_yh; @@ -44,16 +45,17 @@ extern "C" DWORD *dc_destblend; // first pixel in a column extern "C" const BYTE* dc_source; -extern "C" BYTE *dc_dest, *dc_destorg; +extern "C" canvas_pixel_t *dc_dest, *dc_destorg; extern "C" int dc_count; extern "C" DWORD vplce[4]; extern "C" DWORD vince[4]; extern "C" BYTE* palookupoffse[4]; +extern "C" fixed_t palookuplight[4]; extern "C" const BYTE* bufplce[4]; // [RH] Temporary buffer for column drawing -extern "C" BYTE *dc_temp; +extern "C" canvas_pixel_t *dc_temp; extern "C" unsigned int dc_tspans[4][MAXHEIGHT]; extern "C" unsigned int *dc_ctspan[4]; extern "C" unsigned int horizspans[4]; @@ -184,7 +186,7 @@ extern void (*rt_map4cols)(int sx, int yl, int yh); void rt_draw4cols (int sx); // [RH] Preps the temporary horizontal buffer. -void rt_initcols (BYTE *buffer=NULL); +void rt_initcols (canvas_pixel_t *buffer=NULL); void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip); @@ -231,13 +233,15 @@ void R_FillSpan (void); #endif extern "C" void R_SetupDrawSlab(const BYTE *colormap); -extern "C" void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); +extern "C" void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, canvas_pixel_t *p); extern "C" int ds_y; extern "C" int ds_x1; extern "C" int ds_x2; extern "C" lighttable_t* ds_colormap; +//extern "C" dsfixed_t ds_light; +#define ds_light dc_light extern "C" dsfixed_t ds_xfrac; extern "C" dsfixed_t ds_yfrac; diff --git a/src/r_drawt.cpp b/src/r_drawt.cpp index e8faff0ce..f5fc027b5 100644 --- a/src/r_drawt.cpp +++ b/src/r_drawt.cpp @@ -57,8 +57,8 @@ // dc_ctspan is advanced while drawing into dc_temp. // horizspan is advanced up to dc_ctspan when drawing from dc_temp to the screen. -BYTE dc_tempbuff[MAXHEIGHT*4]; -BYTE *dc_temp; +canvas_pixel_t dc_tempbuff[MAXHEIGHT*4]; +canvas_pixel_t *dc_temp; unsigned int dc_tspans[4][MAXHEIGHT]; unsigned int *dc_ctspan[4]; unsigned int *horizspan[4]; @@ -73,8 +73,8 @@ extern "C" void R_SetupAddClampCol(); // Copies one span at hx to the screen at sx. void rt_copy1col_c (int hx, int sx, int yl, int yh) { - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -114,6 +114,13 @@ void rt_copy1col_c (int hx, int sx, int yl, int yh) // Copies all four spans to the screen starting at sx. void rt_copy4cols_c (int sx, int yl, int yh) { +#ifndef PALETTEOUTPUT + // To do: we could do this with SSE using __m128i + rt_copy1col_c(0, sx, yl, yh); + rt_copy1col_c(1, sx + 1, yl, yh); + rt_copy1col_c(2, sx + 2, yl, yh); + rt_copy1col_c(3, sx + 3, yl, yh); +#else int *source; int *dest; int count; @@ -142,14 +149,15 @@ void rt_copy4cols_c (int sx, int yl, int yh) source += 8/sizeof(int); dest += pitch*2; } while (--count); +#endif } // Maps one span at hx to the screen at sx. void rt_map1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -158,13 +166,21 @@ void rt_map1col_c (int hx, int sx, int yl, int yh) return; count++; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); +#endif + colormap = dc_colormap; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; if (count & 1) { +#ifndef PALETTEOUTPUT + *dest = shade_pal_index(colormap[*source], light); +#else *dest = colormap[*source]; +#endif source += 4; dest += pitch; } @@ -172,8 +188,13 @@ void rt_map1col_c (int hx, int sx, int yl, int yh) return; do { +#ifndef PALETTEOUTPUT + dest[0] = shade_pal_index(colormap[source[0]], light); + dest[pitch] = shade_pal_index(colormap[source[4]], light); +#else dest[0] = colormap[source[0]]; dest[pitch] = colormap[source[4]]; +#endif source += 8; dest += pitch*2; } while (--count); @@ -183,8 +204,8 @@ void rt_map1col_c (int hx, int sx, int yl, int yh) void rt_map4cols_c (int sx, int yl, int yh) { BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -193,16 +214,27 @@ void rt_map4cols_c (int sx, int yl, int yh) return; count++; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); +#endif + colormap = dc_colormap; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4]; pitch = dc_pitch; if (count & 1) { +#ifndef PALETTEOUTPUT + dest[0] = shade_pal_index(colormap[source[0]], light); + dest[1] = shade_pal_index(colormap[source[1]], light); + dest[2] = shade_pal_index(colormap[source[2]], light); + dest[3] = shade_pal_index(colormap[source[3]], light); +#else dest[0] = colormap[source[0]]; dest[1] = colormap[source[1]]; dest[2] = colormap[source[2]]; dest[3] = colormap[source[3]]; +#endif source += 4; dest += pitch; } @@ -210,6 +242,16 @@ void rt_map4cols_c (int sx, int yl, int yh) return; do { +#ifndef PALETTEOUTPUT + dest[0] = shade_pal_index(colormap[source[0]], light); + dest[1] = shade_pal_index(colormap[source[1]], light); + dest[2] = shade_pal_index(colormap[source[2]], light); + dest[3] = shade_pal_index(colormap[source[3]], light); + dest[pitch] = shade_pal_index(colormap[source[4]], light); + dest[pitch + 1] = shade_pal_index(colormap[source[5]], light); + dest[pitch + 2] = shade_pal_index(colormap[source[6]], light); + dest[pitch + 3] = shade_pal_index(colormap[source[7]], light); +#else dest[0] = colormap[source[0]]; dest[1] = colormap[source[1]]; dest[2] = colormap[source[2]]; @@ -218,6 +260,7 @@ void rt_map4cols_c (int sx, int yl, int yh) dest[pitch+1] = colormap[source[5]]; dest[pitch+2] = colormap[source[6]]; dest[pitch+3] = colormap[source[7]]; +#endif source += 8; dest += pitch*2; } while (--count); @@ -227,7 +270,7 @@ void rt_map4cols_c (int sx, int yl, int yh) void rt_Translate1col(const BYTE *translation, int hx, int yl, int yh) { int count = yh - yl + 1; - BYTE *source = &dc_temp[yl*4 + hx]; + canvas_pixel_t *source = &dc_temp[yl*4 + hx]; // Things we do to hit the compiler's optimizer with a clue bat: // 1. Parallelism is explicitly spelled out by using a separate @@ -274,7 +317,7 @@ void rt_Translate1col(const BYTE *translation, int hx, int yl, int yh) void rt_Translate4cols(const BYTE *translation, int yl, int yh) { int count = yh - yl + 1; - BYTE *source = &dc_temp[yl*4]; + canvas_pixel_t *source = &dc_temp[yl*4]; int c0, c1; BYTE b0, b1; @@ -330,8 +373,8 @@ void rt_tlate4cols (int sx, int yl, int yh) void rt_add1col (int hx, int sx, int yl, int yh) { BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -340,13 +383,36 @@ void rt_add1col (int hx, int sx, int yl, int yh) return; count++; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; colormap = dc_colormap; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); + + do { + uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + + source += 4; + dest += pitch; + } while (--count); +#else + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; do { DWORD fg = colormap[*source]; DWORD bg = *dest; @@ -358,14 +424,15 @@ void rt_add1col (int hx, int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); +#endif } // Adds all four spans to the screen starting at sx without clamping. void rt_add4cols_c (int sx, int yl, int yh) { BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -374,13 +441,40 @@ void rt_add4cols_c (int sx, int yl, int yh) return; count++; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4]; pitch = dc_pitch; colormap = dc_colormap; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + + source += 4; + dest += pitch; + } while (--count); +#else + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + do { DWORD fg = colormap[source[0]]; DWORD bg = dest[0]; @@ -414,6 +508,7 @@ void rt_add4cols_c (int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); +#endif } // Translates and adds one span at hx to the screen at sx without clamping. @@ -433,10 +528,9 @@ void rt_tlateadd4cols (int sx, int yl, int yh) // Shades one span at hx to the screen at sx. void rt_shaded1col (int hx, int sx, int yl, int yh) { - DWORD *fgstart; BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -445,12 +539,37 @@ void rt_shaded1col (int hx, int sx, int yl, int yh) return; count++; - fgstart = &Col2RGB8[0][dc_color]; colormap = dc_colormap; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do { + uint32_t alpha = colormap[*source]; + uint32_t inv_alpha = 64 - alpha; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; + uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; + uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); +#else + DWORD *fgstart; + fgstart = &Col2RGB8[0][dc_color]; + do { DWORD val = colormap[*source]; DWORD fg = fgstart[val<<8]; @@ -459,15 +578,15 @@ void rt_shaded1col (int hx, int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); +#endif } // Shades all four spans to the screen starting at sx. void rt_shaded4cols_c (int sx, int yl, int yh) { - DWORD *fgstart; BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -476,12 +595,40 @@ void rt_shaded4cols_c (int sx, int yl, int yh) return; count++; - fgstart = &Col2RGB8[0][dc_color]; colormap = dc_colormap; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4]; pitch = dc_pitch; +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do { + for (int i = 0; i < 4; i++) + { + uint32_t alpha = colormap[source[i]]; + uint32_t inv_alpha = 64 - alpha; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; + uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; + uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + source += 4; + dest += pitch; + } while (--count); +#else + DWORD *fgstart; + fgstart = &Col2RGB8[0][dc_color]; + do { DWORD val; @@ -504,14 +651,15 @@ void rt_shaded4cols_c (int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); +#endif } // Adds one span at hx to the screen at sx with clamping. void rt_addclamp1col (int hx, int sx, int yl, int yh) { BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -520,13 +668,36 @@ void rt_addclamp1col (int hx, int sx, int yl, int yh) return; count++; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; colormap = dc_colormap; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); + + do { + uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); +#else + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + do { DWORD a = fg2rgb[colormap[*source]] + bg2rgb[*dest]; DWORD b = a; @@ -540,14 +711,15 @@ void rt_addclamp1col (int hx, int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); +#endif } // Adds all four spans to the screen starting at sx with clamping. void rt_addclamp4cols_c (int sx, int yl, int yh) { BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -556,13 +728,39 @@ void rt_addclamp4cols_c (int sx, int yl, int yh) return; count++; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4]; pitch = dc_pitch; colormap = dc_colormap; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + source += 4; + dest += pitch; + } while (--count); +#else + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + do { DWORD a = fg2rgb[colormap[source[0]]] + bg2rgb[dest[0]]; DWORD b = a; @@ -604,6 +802,7 @@ void rt_addclamp4cols_c (int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); +#endif } // Translates and adds one span at hx to the screen at sx with clamping. @@ -624,8 +823,8 @@ void rt_tlateaddclamp4cols (int sx, int yl, int yh) void rt_subclamp1col (int hx, int sx, int yl, int yh) { BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -634,13 +833,35 @@ void rt_subclamp1col (int hx, int sx, int yl, int yh) return; count++; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; colormap = dc_colormap; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); + + do { + uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); +#else + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; do { DWORD a = (fg2rgb[colormap[*source]] | 0x40100400) - bg2rgb[*dest]; DWORD b = a; @@ -653,14 +874,15 @@ void rt_subclamp1col (int hx, int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); +#endif } // Subtracts all four spans to the screen starting at sx with clamping. void rt_subclamp4cols (int sx, int yl, int yh) { BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -669,13 +891,39 @@ void rt_subclamp4cols (int sx, int yl, int yh) return; count++; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4]; pitch = dc_pitch; colormap = dc_colormap; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + + source += 4; + dest += pitch; + } while (--count); +#else + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; do { DWORD a = (fg2rgb[colormap[source[0]]] | 0x40100400) - bg2rgb[dest[0]]; DWORD b = a; @@ -713,6 +961,7 @@ void rt_subclamp4cols (int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); +#endif } // Translates and subtracts one span at hx to the screen at sx with clamping. @@ -733,8 +982,8 @@ void rt_tlatesubclamp4cols (int sx, int yl, int yh) void rt_revsubclamp1col (int hx, int sx, int yl, int yh) { BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -750,6 +999,28 @@ void rt_revsubclamp1col (int hx, int sx, int yl, int yh) pitch = dc_pitch; colormap = dc_colormap; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); + + do { + uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); +#else do { DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[*source]]; DWORD b = a; @@ -762,14 +1033,15 @@ void rt_revsubclamp1col (int hx, int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); +#endif } // Subtracts all four spans from the screen starting at sx with clamping. void rt_revsubclamp4cols (int sx, int yl, int yh) { BYTE *colormap; - BYTE *source; - BYTE *dest; + canvas_pixel_t *source; + canvas_pixel_t *dest; int count; int pitch; @@ -785,6 +1057,32 @@ void rt_revsubclamp4cols (int sx, int yl, int yh) pitch = dc_pitch; colormap = dc_colormap; +#ifndef PALETTEOUTPUT + uint32_t light = calc_light_multiplier(dc_light); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + + source += 4; + dest += pitch; + } while (--count); +#else do { DWORD a = (bg2rgb[dest[0]] | 0x40100400) - fg2rgb[colormap[source[0]]]; DWORD b = a; @@ -822,6 +1120,7 @@ void rt_revsubclamp4cols (int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); +#endif } // Translates and subtracts one span at hx from the screen at sx with clamping. @@ -1002,7 +1301,7 @@ void rt_draw4cols (int sx) // Before each pass through a rendering loop that uses these routines, // call this function to set up the span pointers. -void rt_initcols (BYTE *buff) +void rt_initcols (canvas_pixel_t *buff) { int y; @@ -1016,7 +1315,7 @@ void rt_initcols (BYTE *buff) void R_DrawColumnHorizP_C (void) { int count = dc_count; - BYTE *dest; + canvas_pixel_t *dest; fixed_t fracstep; fixed_t frac; @@ -1077,7 +1376,7 @@ void R_FillColumnHorizP (void) { int count = dc_count; BYTE color = dc_color; - BYTE *dest; + canvas_pixel_t *dest; if (count <= 0) return; diff --git a/src/r_main.cpp b/src/r_main.cpp index ce4841a2e..04e798981 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -578,7 +578,7 @@ void R_HighlightPortal (PortalDrawseg* pds) BYTE color = (BYTE)BestColor((DWORD *)GPalette.BaseColors, 255, 0, 0, 0, 255); - BYTE* pixels = RenderTarget->GetBuffer(); + canvas_pixel_t* pixels = RenderTarget->GetBuffer(); // top edge for (int x = pds->x1; x < pds->x2; x++) { @@ -623,7 +623,7 @@ void R_EnterPortal (PortalDrawseg* pds, int depth) int Ytop = pds->ceilingclip[x-pds->x1]; int Ybottom = pds->floorclip[x-pds->x1]; - BYTE *dest = RenderTarget->GetBuffer() + x + Ytop * spacing; + canvas_pixel_t *dest = RenderTarget->GetBuffer() + x + Ytop * spacing; for (int y = Ytop; y <= Ybottom; y++) { @@ -794,10 +794,10 @@ void R_EnterPortal (PortalDrawseg* pds, int depth) void R_SetupBuffer () { - static BYTE *lastbuff = NULL; + static canvas_pixel_t *lastbuff = NULL; int pitch = RenderTarget->GetPitch(); - BYTE *lineptr = RenderTarget->GetBuffer() + viewwindowy*pitch + viewwindowx; + canvas_pixel_t *lineptr = RenderTarget->GetBuffer() + viewwindowy*pitch + viewwindowx; if (dc_pitch != pitch || lineptr != lastbuff) { diff --git a/src/r_main.h b/src/r_main.h index 24103393d..37a41a763 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -82,6 +82,34 @@ extern bool r_dontmaplines; // Change R_CalcTiltedLighting() when this changes. #define GETPALOOKUP(vis,shade) (clamp (((shade)-FLOAT2FIXED(MIN(MAXLIGHTVIS,double(vis))))>>FRACBITS, 0, NUMCOLORMAPS-1)) +// Calculate the light multiplier for ds_light +// This is used instead of GETPALOOKUP when ds_colormap+dc_colormap is set to the base colormap +#define LIGHTSCALE(vis,shade) ((shade)-FLOAT2FIXED(MIN(MAXLIGHTVIS,double(vis)))) + +#ifndef PALETTEOUTPUT + +// calculates the light constant passed to the shade_pal_index function +inline uint32_t calc_light_multiplier(dsfixed_t light) +{ + // the 0.70 multiplier shouldn't be needed - maybe the palette shades in doom weren't linear? + return (uint32_t)clamp((1.0 - FIXED2DBL(light) / MAXLIGHTVIS * 0.70) * 256 + 0.5, 0.0, 256.0); +} + +// Calculates a ARGB8 color for the given palette index and light multiplier +inline uint32_t shade_pal_index(uint32_t index, uint32_t light) +{ + const PalEntry &color = GPalette.BaseColors[index]; + uint32_t red = color.r; + uint32_t green = color.g; + uint32_t blue = color.b; + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + +#endif + extern double GlobVis; void R_SetVisibility(double visibility); diff --git a/src/r_plane.cpp b/src/r_plane.cpp index d749319e3..b385302e5 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -227,8 +227,14 @@ void R_MapPlane (int y, int x1) if (plane_shade) { // Determine lighting based on the span's distance from the viewer. +#ifndef PALETTEOUTPUT + ds_colormap = basecolormap->Maps; + ds_light = LIGHTSCALE(GlobVis * fabs(CenterY - y), planeshade); +#else ds_colormap = basecolormap->Maps + (GETPALOOKUP ( GlobVis * fabs(CenterY - y), planeshade) << COLORMAPSHIFT); + ds_light = 0; +#endif } #ifdef X86_ASM @@ -360,7 +366,7 @@ void R_MapTiltedPlane (int y, int x1) int x2 = spanend[y]; int width = x2 - x1; double iz, uz, vz; - BYTE *fb; + canvas_pixel_t *fb; DWORD u, v; int i; @@ -393,6 +399,7 @@ void R_MapTiltedPlane (int y, int x1) u = SQWORD(uz*z) + pviewx; v = SQWORD(vz*z) + pviewy; ds_colormap = tiltlighting[i]; + ds_light = 0; fb[i++] = ds_colormap[ds_source[(v >> vshift) | ((u >> ushift) & umask)]]; iz += plane_sz[0]; uz += plane_su[0]; @@ -486,7 +493,16 @@ void R_MapTiltedPlane (int y, int x1) void R_MapColoredPlane (int y, int x1) { - memset (ylookup[y] + x1 + dc_destorg, ds_color, spanend[y] - x1 + 1); +#ifndef PALETTEOUTPUT + canvas_pixel_t *dest = ylookup[y] + x1 + dc_destorg; + int count = (spanend[y] - x1 + 1); + uint32_t light = calc_light_multiplier(ds_light); + uint32_t color = shade_pal_index(ds_color, light); + for (int i = 0; i < count; i++) + dest[i] = color; +#else + memset (ylookup[y] + x1 + dc_destorg, ds_color, (spanend[y] - x1 + 1) * sizeof(canvas_pixel_t)); +#endif } //========================================================================== @@ -1462,11 +1478,13 @@ void R_DrawSkyPlane (visplane_t *pl) if (fixedcolormap) { dc_colormap = fixedcolormap; + dc_light = 0; } else { fakefixed = true; fixedcolormap = dc_colormap = NormalLight.Maps; + dc_light = 0; } R_DrawSky (pl); @@ -1547,6 +1565,7 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t planeheight = fabs(pl->height.Zat0() - ViewPos.Z); GlobVis = r_FloorVisibility / planeheight; + ds_light = 0; if (fixedlightlev >= 0) ds_colormap = basecolormap->Maps + fixedlightlev, plane_shade = false; else if (fixedcolormap) @@ -1707,6 +1726,7 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t if (pl->height.fC() > 0) planelightfloat = -planelightfloat; + ds_light = 0; if (fixedlightlev >= 0) ds_colormap = basecolormap->Maps + fixedlightlev, plane_shade = false; else if (fixedcolormap) diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 4eb3cb440..1cdb78555 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -178,6 +178,7 @@ static void BlastMaskedColumn (void (*blastfunc)(const BYTE *pixels, const FText if (fixedcolormap == NULL && fixedlightlev < 0) { dc_colormap = basecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); + dc_light = 0; } dc_iscale = xs_Fix<16>::ToFix(MaskedSWall[dc_x] * MaskedScaleY); @@ -316,6 +317,7 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) dc_colormap = basecolormap->Maps + fixedlightlev; else if (fixedcolormap != NULL) dc_colormap = fixedcolormap; + dc_light = 0; // find positioning texheight = tex->GetScaledHeightDouble(); @@ -633,6 +635,7 @@ void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover) dc_colormap = basecolormap->Maps + fixedlightlev; else if (fixedcolormap != NULL) dc_colormap = fixedcolormap; + dc_light = 0; WallC.sz1 = ds->sz1; WallC.sz2 = ds->sz2; @@ -1066,10 +1069,11 @@ void R_RenderFakeWallRange (drawseg_t *ds, int x1, int x2) } // prevlineasm1 is like vlineasm1 but skips the loop if only drawing one pixel -inline fixed_t prevline1 (fixed_t vince, BYTE *colormap, int count, fixed_t vplce, const BYTE *bufplce, BYTE *dest) +inline fixed_t prevline1 (fixed_t vince, BYTE *colormap, fixed_t light, int count, fixed_t vplce, const BYTE *bufplce, canvas_pixel_t *dest) { dc_iscale = vince; dc_colormap = colormap; + dc_light = light; dc_count = count; dc_texturefrac = vplce; dc_source = bufplce; @@ -1117,6 +1121,10 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l palookupoffse[1] = dc_colormap; palookupoffse[2] = dc_colormap; palookupoffse[3] = dc_colormap; + palookuplight[0] = 0; + palookuplight[1] = 0; + palookuplight[2] = 0; + palookuplight[3] = 0; } for(; (x < x2) && (x & 3); ++x) @@ -1130,7 +1138,13 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l if (!fixed) { // calculate lighting +#ifndef PALETTEOUTPUT + dc_colormap = basecolormapdata; + dc_light = LIGHTSCALE(light, wallshade); +#else dc_colormap = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); + dc_light = 0; +#endif } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1170,7 +1184,13 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l for (z = 0; z < 4; ++z) { light += rw_lightstep; - palookupoffse[z] = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); +#ifndef PALETTEOUTPUT + palookupoffse[z] = basecolormapdata; + palookuplight[z] = LIGHTSCALE(light, wallshade); +#else + palookupoffse[z] = basecolormapdata + (GETPALOOKUP(12/*light*/, wallshade) << COLORMAPSHIFT); + palookuplight[z] = 0; +#endif } } @@ -1183,7 +1203,7 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l { if (!(bad & 1)) { - prevline1(vince[z],palookupoffse[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+x+z+dc_destorg); + prevline1(vince[z],palookupoffse[z],palookuplight[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+x+z+dc_destorg); } bad >>= 1; } @@ -1194,7 +1214,7 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l { if (u4 > y1ve[z]) { - vplce[z] = prevline1(vince[z],palookupoffse[z],u4-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+x+z+dc_destorg); + vplce[z] = prevline1(vince[z],palookupoffse[z], palookuplight[z],u4-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+x+z+dc_destorg); } } @@ -1205,12 +1225,12 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l dovline4(); } - BYTE *i = x+ylookup[d4]+dc_destorg; + canvas_pixel_t *i = x+ylookup[d4]+dc_destorg; for (z = 0; z < 4; ++z) { if (y2ve[z] > d4) { - prevline1(vince[z],palookupoffse[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z); + prevline1(vince[z],palookupoffse[0],palookuplight[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z); } } } @@ -1225,7 +1245,13 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l if (!fixed) { // calculate lighting +#ifndef PALETTEOUTPUT + dc_colormap = basecolormapdata; + dc_light = LIGHTSCALE(light, wallshade); +#else dc_colormap = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); + dc_light = 0; +#endif } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1416,10 +1442,11 @@ static void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *d } } -inline fixed_t mvline1 (fixed_t vince, BYTE *colormap, int count, fixed_t vplce, const BYTE *bufplce, BYTE *dest) +inline fixed_t mvline1 (fixed_t vince, BYTE *colormap, int count, fixed_t vplce, const BYTE *bufplce, canvas_pixel_t *dest) { dc_iscale = vince; dc_colormap = colormap; + dc_light = 0; dc_count = count; dc_texturefrac = vplce; dc_source = bufplce; @@ -1431,7 +1458,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) { int x, fracbits; - BYTE *p; + canvas_pixel_t *p; int y1ve[4], y2ve[4], u4, d4, startx, dax, z; char bad; float light = rw_light - rw_lightstep; @@ -1471,7 +1498,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ palookupoffse[3] = dc_colormap; } - for(; (x < x2) && ((size_t)p & 3); ++x, ++p) + for(; (x < x2) && (((size_t)p/sizeof(canvas_pixel_t)) & 3); ++x, ++p) { light += rw_lightstep; y1ve[0] = uwal[x];//max(uwal[x],umost[x]); @@ -1481,6 +1508,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ if (!fixed) { // calculate lighting dc_colormap = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); + dc_light = 0; } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1553,7 +1581,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ domvline4(); } - BYTE *i = p+ylookup[d4]; + canvas_pixel_t *i = p+ylookup[d4]; for (z = 0; z < 4; ++z) { if (y2ve[z] > d4) @@ -1572,6 +1600,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ if (!fixed) { // calculate lighting dc_colormap = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); + dc_light = 0; } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1589,10 +1618,11 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ NetUpdate (); } -inline void preptmvline1 (fixed_t vince, BYTE *colormap, int count, fixed_t vplce, const BYTE *bufplce, BYTE *dest) +inline void preptmvline1 (fixed_t vince, BYTE *colormap, fixed_t light, int count, fixed_t vplce, const BYTE *bufplce, canvas_pixel_t *dest) { dc_iscale = vince; dc_colormap = colormap; + dc_light = light; dc_count = count; dc_texturefrac = vplce; dc_source = bufplce; @@ -1605,7 +1635,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f fixed_t (*tmvline1)(); void (*tmvline4)(); int x, fracbits; - BYTE *p; + canvas_pixel_t *p; int y1ve[4], y2ve[4], u4, d4, startx, dax, z; char bad; float light = rw_light - rw_lightstep; @@ -1645,9 +1675,13 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f palookupoffse[1] = dc_colormap; palookupoffse[2] = dc_colormap; palookupoffse[3] = dc_colormap; + palookuplight[0] = 0; + palookuplight[1] = 0; + palookuplight[2] = 0; + palookuplight[3] = 0; } - for(; (x < x2) && ((size_t)p & 3); ++x, ++p) + for(; (x < x2) && (((size_t)p / sizeof(canvas_pixel_t)) & 3); ++x, ++p) { light += rw_lightstep; y1ve[0] = uwal[x];//max(uwal[x],umost[x]); @@ -1656,7 +1690,13 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f if (!fixed) { // calculate lighting +#ifndef PALETTEOUTPUT + dc_colormap = basecolormapdata; + dc_light = LIGHTSCALE(light, wallshade); +#else dc_colormap = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); + dc_light = 0; +#endif } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1694,7 +1734,12 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f for (z = 0; z < 4; ++z) { light += rw_lightstep; +#ifndef PALETTEOUTPUT + palookupoffse[z] = basecolormapdata; + palookuplight[z] = LIGHTSCALE(light, wallshade); +#else palookupoffse[z] = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); +#endif } } @@ -1707,7 +1752,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f { if (!(bad & 1)) { - preptmvline1(vince[z],palookupoffse[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+p+z); + preptmvline1(vince[z],palookupoffse[z],palookuplight[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+p+z); tmvline1(); } bad >>= 1; @@ -1719,7 +1764,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f { if (u4 > y1ve[z]) { - preptmvline1(vince[z],palookupoffse[z],u4-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+p+z); + preptmvline1(vince[z],palookupoffse[z],palookuplight[z],u4-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+p+z); vplce[z] = tmvline1(); } } @@ -1731,12 +1776,12 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f tmvline4(); } - BYTE *i = p+ylookup[d4]; + canvas_pixel_t *i = p+ylookup[d4]; for (z = 0; z < 4; ++z) { if (y2ve[z] > d4) { - preptmvline1(vince[z],palookupoffse[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z); + preptmvline1(vince[z],palookupoffse[0],palookuplight[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z); tmvline1(); } } @@ -1750,7 +1795,13 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f if (!fixed) { // calculate lighting - dc_colormap = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); +#ifndef PALETTEOUTPUT + dc_colormap = basecolormapdata; + dc_light = LIGHTSCALE(light, wallshade); +#else + dc_colormap = basecolormapdata + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + dc_light = 0; +#endif } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1791,6 +1842,7 @@ void R_RenderSegLoop () dc_colormap = basecolormap->Maps + fixedlightlev; else if (fixedcolormap != NULL) dc_colormap = fixedcolormap; + dc_light = 0; // clip wall to the floor and ceiling for (x = x1; x < x2; ++x) @@ -3194,6 +3246,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, dc_colormap = usecolormap->Maps; else calclighting = true; + dc_light = 0; // Draw it if (decal->RenderFlags & RF_YFLIP) @@ -3242,7 +3295,13 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting +#ifndef PALETTEOUTPUT + dc_colormap = usecolormap->Maps; + dc_light = LIGHTSCALE(rw_light, wallshade); +#else dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); + dc_light = 0; +#endif } R_WallSpriteColumn (R_DrawMaskedColumn); dc_x++; @@ -3252,7 +3311,13 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting +#ifndef PALETTEOUTPUT + dc_colormap = usecolormap->Maps; + dc_light = LIGHTSCALE(rw_light, wallshade); +#else dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); + dc_light = 0; +#endif } rt_initcols(); for (int zz = 4; zz; --zz) @@ -3267,7 +3332,13 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting +#ifndef PALETTEOUTPUT + dc_colormap = usecolormap->Maps; + dc_light = LIGHTSCALE(rw_light, wallshade); +#else dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); + dc_light = 0; +#endif } R_WallSpriteColumn (R_DrawMaskedColumn); dc_x++; diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 07edf25e9..433007acb 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -182,6 +182,7 @@ void FSoftwareRenderer::RemapVoxels() void FSoftwareRenderer::WriteSavePic (player_t *player, FILE *file, int width, int height) { +#ifdef PALETTEOUTPUT DCanvas *pic = new DSimpleCanvas (width, height); PalEntry palette[256]; @@ -195,6 +196,7 @@ void FSoftwareRenderer::WriteSavePic (player_t *player, FILE *file, int width, i pic->Destroy(); pic->ObjectFlags |= OF_YesReallyDelete; delete pic; +#endif } //=========================================================================== @@ -311,6 +313,7 @@ void FSoftwareRenderer::CopyStackedViewParameters() void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoint, int fov) { +#ifdef PALETTEOUTPUT BYTE *Pixels = const_cast(tex->GetPixels()); DSimpleCanvas *Canvas = tex->GetCanvas(); @@ -334,6 +337,7 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin tex->SetUpdated(); fixedcolormap = savecolormap; realfixedcolormap = savecm; +#endif } //========================================================================== diff --git a/src/r_things.cpp b/src/r_things.cpp index 427e61b06..0e55b45f9 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -132,7 +132,7 @@ EXTERN_CVAR (Bool, r_drawvoxels) // int OffscreenBufferWidth, OffscreenBufferHeight; -BYTE *OffscreenColorBuffer; +canvas_pixel_t *OffscreenColorBuffer; FCoverageBuffer *OffscreenCoverageBuffer; // @@ -408,6 +408,7 @@ void R_DrawVisSprite (vissprite_t *vis) fixed_t centeryfrac = FLOAT2FIXED(CenterY); dc_colormap = vis->Style.colormap; + dc_light = 0; mode = R_SetPatchStyle (vis->Style.RenderStyle, vis->Style.Alpha, vis->Translation, vis->FillColor); @@ -544,6 +545,7 @@ void R_DrawWallSprite(vissprite_t *spr) dc_colormap = usecolormap->Maps; else calclighting = true; + dc_light = 0; // Draw it WallSpriteTile = spr->pic; @@ -592,7 +594,13 @@ void R_DrawWallSprite(vissprite_t *spr) { if (calclighting) { // calculate lighting +#ifndef PALETTEOUTPUT + dc_colormap = usecolormap->Maps; + dc_light = LIGHTSCALE(rw_light, shade); +#else dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, shade) << COLORMAPSHIFT); + dc_light = FLOAT2FIXED(MAXLIGHTVIS); +#endif } if (!R_ClipSpriteColumnWithPortals(spr)) R_WallSpriteColumn(R_DrawMaskedColumn); @@ -603,7 +611,13 @@ void R_DrawWallSprite(vissprite_t *spr) { if (calclighting) { // calculate lighting +#ifndef PALETTEOUTPUT + dc_colormap = usecolormap->Maps; + dc_light = LIGHTSCALE(rw_light, shade); +#else dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, shade) << COLORMAPSHIFT); + dc_light = FLOAT2FIXED(MAXLIGHTVIS); +#endif } rt_initcols(); for (int zz = 4; zz; --zz) @@ -619,7 +633,13 @@ void R_DrawWallSprite(vissprite_t *spr) { if (calclighting) { // calculate lighting +#ifndef PALETTEOUTPUT + dc_colormap = usecolormap->Maps; + dc_light = LIGHTSCALE(rw_light, shade); +#else dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, shade) << COLORMAPSHIFT); + dc_light = FLOAT2FIXED(MAXLIGHTVIS); +#endif } if (!R_ClipSpriteColumnWithPortals(spr)) R_WallSpriteColumn(R_DrawMaskedColumn); @@ -654,6 +674,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop // Do setup for blending. dc_colormap = spr->Style.colormap; + dc_light = 0; mode = R_SetPatchStyle(spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor); if (mode == DontDraw) @@ -2598,10 +2619,8 @@ static void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis) void R_DrawParticle (vissprite_t *vis) { - DWORD *bg2rgb; int spacing; - BYTE *dest; - DWORD fg; + canvas_pixel_t *dest; BYTE color = vis->Style.colormap[vis->startfrac]; int yl = vis->y1; int ycount = vis->y2 - yl + 1; @@ -2610,6 +2629,47 @@ void R_DrawParticle (vissprite_t *vis) R_DrawMaskedSegsBehindParticle (vis); +#ifndef PALETTEOUTPUT + uint32_t fg = shade_pal_index(color, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + // vis->renderflags holds translucency level (0-255) + fixed_t fglevel = ((vis->renderflags + 1) << 8) & ~0x3ff; + uint32_t alpha = fglevel * 256 / FRACUNIT; + uint32_t inv_alpha = 256 - alpha; + + fg_red *= alpha; + fg_green *= alpha; + fg_blue *= alpha; + + spacing = RenderTarget->GetPitch(); + + for (int x = x1; x < (x1 + countbase); x++) + { + dc_x = x; + if (R_ClipSpriteColumnWithPortals(vis)) + continue; + dest = ylookup[yl] + x + dc_destorg; + for (int y = 0; y < ycount; y++) + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red * alpha) / 256; + uint32_t green = (fg_green + bg_green * alpha) / 256; + uint32_t blue = (fg_blue + bg_blue * alpha) / 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += spacing; + } + } +#else + DWORD *bg2rgb; + DWORD fg; + // vis->renderflags holds translucency level (0-255) { fixed_t fglevel, bglevel; @@ -2659,6 +2719,7 @@ void R_DrawParticle (vissprite_t *vis) dest += spacing; } } +#endif } extern double BaseYaspectMul;; @@ -3189,12 +3250,12 @@ void R_CheckOffscreenBuffer(int width, int height, bool spansonly) { if (OffscreenColorBuffer == NULL) { - OffscreenColorBuffer = new BYTE[width * height]; + OffscreenColorBuffer = new canvas_pixel_t[width * height]; } else if (OffscreenBufferWidth != width || OffscreenBufferHeight != height) { delete[] OffscreenColorBuffer; - OffscreenColorBuffer = new BYTE[width * height]; + OffscreenColorBuffer = new canvas_pixel_t[width * height]; } } OffscreenBufferWidth = width; diff --git a/src/textures/canvastexture.cpp b/src/textures/canvastexture.cpp index 062c3af1d..7388c1306 100644 --- a/src/textures/canvastexture.cpp +++ b/src/textures/canvastexture.cpp @@ -106,6 +106,10 @@ void FCanvasTexture::MakeTexture () Canvas = new DSimpleCanvas (Width, Height); Canvas->Lock (); GC::AddSoftRoot(Canvas); +#ifndef PALETTEOUTPUT + Pixels = new BYTE[Width*Height]; + bPixelsAllocated = true; +#else if (Width != Height || Width != Canvas->GetPitch()) { Pixels = new BYTE[Width*Height]; @@ -116,6 +120,7 @@ void FCanvasTexture::MakeTexture () Pixels = Canvas->GetBuffer(); bPixelsAllocated = false; } +#endif // Draw a special "unrendered" initial texture into the buffer. memset (Pixels, 0, Width*Height/2); memset (Pixels+Width*Height/2, 255, Width*Height/2); diff --git a/src/v_draw.cpp b/src/v_draw.cpp index c7b62b0a6..fd14b5e0a 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -166,16 +166,18 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) if (translation != NULL) { dc_colormap = (lighttable_t *)translation; + dc_light = 0; } else { dc_colormap = identitymap; + dc_light = 0; } fixedcolormap = dc_colormap; ESPSResult mode = R_SetPatchStyle (parms.style, parms.Alpha, 0, parms.fillcolor); - BYTE *destorgsave = dc_destorg; + canvas_pixel_t *destorgsave = dc_destorg; dc_destorg = screen->GetBuffer(); if (dc_destorg == NULL) { @@ -1015,13 +1017,32 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) oldyyshifted = yy * GetPitch(); } - BYTE *spot = GetBuffer() + oldyyshifted + xx; +#ifndef PALETTEOUTPUT + canvas_pixel_t *spot = GetBuffer() + oldyyshifted + xx; + + uint32_t fg = shade_pal_index(basecolor, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*spot >> 16) & 0xff; + uint32_t bg_green = (*spot >> 8) & 0xff; + uint32_t bg_blue = (*spot) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *spot = 0xff000000 | (red << 16) | (green << 8) | blue; +#else + canvas_pixel_t *spot = GetBuffer() + oldyyshifted + xx; DWORD *bg2rgb = Col2RGB8[1+level]; DWORD *fg2rgb = Col2RGB8[63-level]; DWORD fg = fg2rgb[basecolor]; DWORD bg = bg2rgb[*spot]; bg = (fg+bg) | 0x1f07c1f; *spot = RGB32k.All[bg&(bg>>15)]; +#endif } void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 realcolor) @@ -1069,7 +1090,7 @@ void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 real } else if (deltaX == 0) { // vertical line - BYTE *spot = GetBuffer() + y0*GetPitch() + x0; + canvas_pixel_t *spot = GetBuffer() + y0*GetPitch() + x0; int pitch = GetPitch (); do { @@ -1079,7 +1100,7 @@ void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 real } else if (deltaX == deltaY) { // diagonal line. - BYTE *spot = GetBuffer() + y0*GetPitch() + x0; + canvas_pixel_t *spot = GetBuffer() + y0*GetPitch() + x0; int advance = GetPitch() + xDir; do { @@ -1205,7 +1226,7 @@ void DCanvas::DrawPixel(int x, int y, int palColor, uint32 realcolor) void DCanvas::Clear (int left, int top, int right, int bottom, int palcolor, uint32 color) { int x, y; - BYTE *dest; + canvas_pixel_t *dest; if (left == right || top == bottom) { @@ -1426,11 +1447,11 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, // V_DrawBlock // Draw a linear block of pixels into the view buffer. // -void DCanvas::DrawBlock (int x, int y, int _width, int _height, const BYTE *src) const +void DCanvas::DrawBlock (int x, int y, int _width, int _height, const canvas_pixel_t *src) const { int srcpitch = _width; int destpitch; - BYTE *dest; + canvas_pixel_t *dest; if (ClipBox (x, y, _width, _height, src, srcpitch)) { @@ -1442,7 +1463,7 @@ void DCanvas::DrawBlock (int x, int y, int _width, int _height, const BYTE *src) do { - memcpy (dest, src, _width); + memcpy (dest, src, _width * sizeof(canvas_pixel_t)); src += srcpitch; dest += destpitch; } while (--_height); @@ -1452,9 +1473,9 @@ void DCanvas::DrawBlock (int x, int y, int _width, int _height, const BYTE *src) // V_GetBlock // Gets a linear block of pixels from the view buffer. // -void DCanvas::GetBlock (int x, int y, int _width, int _height, BYTE *dest) const +void DCanvas::GetBlock (int x, int y, int _width, int _height, canvas_pixel_t *dest) const { - const BYTE *src; + const canvas_pixel_t *src; #ifdef RANGECHECK if (x<0 @@ -1470,14 +1491,14 @@ void DCanvas::GetBlock (int x, int y, int _width, int _height, BYTE *dest) const while (_height--) { - memcpy (dest, src, _width); + memcpy (dest, src, _width * sizeof(canvas_pixel_t)); src += Pitch; dest += _width; } } // Returns true if the box was completely clipped. False otherwise. -bool DCanvas::ClipBox (int &x, int &y, int &w, int &h, const BYTE *&src, const int srcpitch) const +bool DCanvas::ClipBox (int &x, int &y, int &w, int &h, const canvas_pixel_t *&src, const int srcpitch) const { if (x >= Width || y >= Height || x+w <= 0 || y+h <= 0) { // Completely clipped off screen diff --git a/src/v_video.cpp b/src/v_video.cpp index 01a73950b..b6a626753 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -343,10 +343,8 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) if (damount == 0.f) return; - DWORD *bg2rgb; - DWORD fg; int gap; - BYTE *spot; + canvas_pixel_t *spot; int x, y; if (x1 >= Width || y1 >= Height) @@ -366,6 +364,43 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) return; } + spot = Buffer + x1 + y1*Pitch; + gap = Pitch - w; + +#ifndef PALETTEOUTPUT + uint32_t fg = color.d; + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t alpha = (uint32_t)clamp(damount * 256 + 0.5f, 0.0f, 256.0f); + uint32_t inv_alpha = 256 - alpha; + + fg_red *= alpha; + fg_green *= alpha; + fg_blue *= alpha; + + for (y = h; y != 0; y--) + { + for (x = w; x != 0; x--) + { + uint32_t bg_red = (*spot >> 16) & 0xff; + uint32_t bg_green = (*spot >> 8) & 0xff; + uint32_t bg_blue = (*spot) & 0xff; + + uint32_t red = (fg_red + bg_red * inv_alpha) / 256; + uint32_t green = (fg_green + bg_green * inv_alpha) / 256; + uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256; + + *spot = 0xff000000 | (red << 16) | (green << 8) | blue; + spot++; + } + spot += gap; + } +#else + DWORD *bg2rgb; + DWORD fg; + { int amount; @@ -377,8 +412,6 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) (((color.b * amount) >> 4) << 10); } - spot = Buffer + x1 + y1*Pitch; - gap = Pitch - w; for (y = h; y != 0; y--) { for (x = w; x != 0; x--) @@ -392,6 +425,7 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) } spot += gap; } +#endif } //========================================================================== @@ -403,7 +437,7 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) // //========================================================================== -void DCanvas::GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &color_type) +void DCanvas::GetScreenshotBuffer(const canvas_pixel_t *&buffer, int &pitch, ESSType &color_type) { Lock(true); buffer = GetBuffer(); @@ -759,8 +793,8 @@ DSimpleCanvas::DSimpleCanvas (int width, int height) Pitch = width + MAX(0, CPU.DataL1LineSize - 8); } } - MemBuffer = new BYTE[Pitch * height]; - memset (MemBuffer, 0, Pitch * height); + MemBuffer = new canvas_pixel_t[Pitch * height]; + memset (MemBuffer, 0, Pitch * height * sizeof(canvas_pixel_t)); } //========================================================================== @@ -879,7 +913,7 @@ void DFrameBuffer::DrawRateStuff () { int i = I_GetTime(false); int tics = i - LastTic; - BYTE *buffer = GetBuffer(); + canvas_pixel_t *buffer = GetBuffer(); LastTic = i; if (tics > 20) tics = 20; diff --git a/src/v_video.h b/src/v_video.h index fa1ce83df..27c09ee36 100644 --- a/src/v_video.h +++ b/src/v_video.h @@ -189,7 +189,7 @@ public: virtual ~DCanvas (); // Member variable access - inline BYTE *GetBuffer () const { return Buffer; } + inline canvas_pixel_t *GetBuffer () const { return Buffer; } inline int GetWidth () const { return Width; } inline int GetHeight () const { return Height; } inline int GetPitch () const { return Pitch; } @@ -202,10 +202,10 @@ public: virtual bool IsLocked () { return Buffer != NULL; } // Returns true if the surface is locked // Draw a linear block of pixels into the canvas - virtual void DrawBlock (int x, int y, int width, int height, const BYTE *src) const; + virtual void DrawBlock (int x, int y, int width, int height, const canvas_pixel_t *src) const; // Reads a linear block of pixels into the view buffer. - virtual void GetBlock (int x, int y, int width, int height, BYTE *dest) const; + virtual void GetBlock (int x, int y, int width, int height, canvas_pixel_t *dest) const; // Dim the entire canvas for the menus virtual void Dim (PalEntry color = 0); @@ -237,7 +237,7 @@ public: // Retrieves a buffer containing image data for a screenshot. // Hint: Pitch can be negative for upside-down images, in which case buffer // points to the last row in the buffer, which will be the first row output. - virtual void GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &color_type); + virtual void GetScreenshotBuffer(const canvas_pixel_t *&buffer, int &pitch, ESSType &color_type); // Releases the screenshot buffer. virtual void ReleaseScreenshotBuffer(); @@ -262,13 +262,13 @@ public: void DrawChar (FFont *font, int normalcolor, int x, int y, BYTE character, int tag_first, ...); protected: - BYTE *Buffer; + canvas_pixel_t *Buffer; int Width; int Height; int Pitch; int LockCount; - bool ClipBox (int &left, int &top, int &width, int &height, const BYTE *&src, const int srcpitch) const; + bool ClipBox (int &left, int &top, int &width, int &height, const canvas_pixel_t *&src, const int srcpitch) const; void DrawTextureV(FTexture *img, double x, double y, uint32 tag, va_list tags) = delete; virtual void DrawTextureParms(FTexture *img, DrawParms &parms); bool ParseDrawTextureTags (FTexture *img, double x, double y, uint32 tag, va_list tags, DrawParms *parms, bool fortext) const; @@ -297,7 +297,7 @@ public: void Unlock (); protected: - BYTE *MemBuffer; + canvas_pixel_t *MemBuffer; DSimpleCanvas() {} }; diff --git a/src/win32/fb_d3d9.cpp b/src/win32/fb_d3d9.cpp index efdced151..14a78d4cd 100644 --- a/src/win32/fb_d3d9.cpp +++ b/src/win32/fb_d3d9.cpp @@ -765,14 +765,20 @@ void D3DFB::KillNativeTexs() bool D3DFB::CreateFBTexture () { - if (FAILED(D3DDevice->CreateTexture(Width, Height, 1, D3DUSAGE_DYNAMIC, D3DFMT_L8, D3DPOOL_DEFAULT, &FBTexture, NULL))) +#ifndef PALETTEOUTPUT + D3DFORMAT FBFormat = D3DFMT_A8R8G8B8; +#else + D3DFORMAT FBFormat = D3DFMT_L8; +#endif + + if (FAILED(D3DDevice->CreateTexture(Width, Height, 1, D3DUSAGE_DYNAMIC, FBFormat, D3DPOOL_DEFAULT, &FBTexture, NULL))) { int pow2width, pow2height, i; for (i = 1; i < Width; i <<= 1) {} pow2width = i; for (i = 1; i < Height; i <<= 1) {} pow2height = i; - if (FAILED(D3DDevice->CreateTexture(pow2width, pow2height, 1, D3DUSAGE_DYNAMIC, D3DFMT_L8, D3DPOOL_DEFAULT, &FBTexture, NULL))) + if (FAILED(D3DDevice->CreateTexture(pow2width, pow2height, 1, D3DUSAGE_DYNAMIC, FBFormat, D3DPOOL_DEFAULT, &FBTexture, NULL))) { return false; } @@ -1304,18 +1310,18 @@ void D3DFB::Draw3DPart(bool copy3d) SUCCEEDED(FBTexture->LockRect (0, &lockrect, NULL, D3DLOCK_DISCARD))) || SUCCEEDED(FBTexture->LockRect (0, &lockrect, &texrect, 0))) { - if (lockrect.Pitch == Pitch && Pitch == Width) + if (lockrect.Pitch == Pitch * sizeof(canvas_pixel_t) && Pitch == Width) { - memcpy (lockrect.pBits, MemBuffer, Width * Height); + memcpy (lockrect.pBits, MemBuffer, Width * Height * sizeof(canvas_pixel_t)); } else { - BYTE *dest = (BYTE *)lockrect.pBits; - BYTE *src = MemBuffer; + canvas_pixel_t *dest = (canvas_pixel_t *)lockrect.pBits; + canvas_pixel_t *src = MemBuffer; for (int y = 0; y < Height; y++) { - memcpy (dest, src, Width); - dest += lockrect.Pitch; + memcpy (dest, src, Width * sizeof(canvas_pixel_t)); + dest = reinterpret_cast(reinterpret_cast(dest) + lockrect.Pitch); src += Pitch; } } @@ -1349,7 +1355,11 @@ void D3DFB::Draw3DPart(bool copy3d) memset(Constant, 0, sizeof(Constant)); SetAlphaBlend(D3DBLENDOP(0)); EnableAlphaTest(FALSE); +#ifndef PALETTEOUTPUT + SetPixelShader(Shaders[SHADER_NormalColor]); +#else SetPixelShader(Shaders[SHADER_NormalColorPal]); +#endif if (copy3d) { FBVERTEX verts[4]; @@ -1367,7 +1377,11 @@ void D3DFB::Draw3DPart(bool copy3d) realfixedcolormap->ColorizeStart[1]/2, realfixedcolormap->ColorizeStart[2]/2, 0); color1 = D3DCOLOR_COLORVALUE(realfixedcolormap->ColorizeEnd[0]/2, realfixedcolormap->ColorizeEnd[1]/2, realfixedcolormap->ColorizeEnd[2]/2, 1); +#ifndef PALETTEOUTPUT + SetPixelShader(Shaders[SHADER_SpecialColormap]); +#else SetPixelShader(Shaders[SHADER_SpecialColormapPal]); +#endif } } else @@ -1378,7 +1392,11 @@ void D3DFB::Draw3DPart(bool copy3d) CalcFullscreenCoords(verts, Accel2D, false, color0, color1); D3DDevice->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, verts, sizeof(FBVERTEX)); } +#ifndef PALETTEOUTPUT + SetPixelShader(Shaders[SHADER_NormalColor]); +#else SetPixelShader(Shaders[SHADER_NormalColorPal]); +#endif } //========================================================================== @@ -1707,7 +1725,7 @@ void D3DFB::SetBlendingRect(int x1, int y1, int x2, int y2) // //========================================================================== -void D3DFB::GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &color_type) +void D3DFB::GetScreenshotBuffer(const canvas_pixel_t *&buffer, int &pitch, ESSType &color_type) { D3DLOCKED_RECT lrect; @@ -1733,7 +1751,7 @@ void D3DFB::GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &color_ } else { - buffer = (const BYTE *)lrect.pBits; + buffer = (const canvas_pixel_t *)lrect.pBits; pitch = lrect.Pitch; color_type = SS_BGRA; } diff --git a/src/win32/fb_ddraw.cpp b/src/win32/fb_ddraw.cpp index 7cc603786..9be571f98 100644 --- a/src/win32/fb_ddraw.cpp +++ b/src/win32/fb_ddraw.cpp @@ -32,7 +32,6 @@ ** */ - // HEADER FILES ------------------------------------------------------------ #define DIRECTDRAW_VERSION 0x0300 @@ -61,7 +60,9 @@ // TYPES ------------------------------------------------------------------- +#ifdef USE_OBSOLETE_DDRAW IMPLEMENT_CLASS(DDrawFB) +#endif // EXTERNAL FUNCTION PROTOTYPES -------------------------------------------- @@ -119,6 +120,8 @@ cycle_t BlitCycles; // CODE -------------------------------------------------------------------- +#ifdef USE_OBSOLETE_DDRAW + DDrawFB::DDrawFB (int width, int height, bool fullscreen) : BaseWinFB (width, height) { @@ -996,8 +999,8 @@ DDrawFB::LockSurfRes DDrawFB::LockSurf (LPRECT lockrect, LPDIRECTDRAWSURFACE toL LOG1 ("Final result after restoration attempts: %08lx\n", hr); return NoGood; } - Buffer = (BYTE *)desc.lpSurface; - Pitch = desc.lPitch; + Buffer = (canvas_pixel_t *)desc.lpSurface; + Pitch = desc.lPitch / sizeof(canvas_pixel_t); BufferingNow = false; return wasLost ? GoodWasLost : Good; } @@ -1327,6 +1330,7 @@ void DDrawFB::Blank () PrimarySurf->Blt (NULL, NULL, NULL, DDBLT_COLORFILL, &blitFX); } } +#endif ADD_STAT (blit) { diff --git a/src/win32/win32iface.h b/src/win32/win32iface.h index 9b2754eae..73a2c6966 100644 --- a/src/win32/win32iface.h +++ b/src/win32/win32iface.h @@ -142,6 +142,7 @@ protected: BaseWinFB() {} }; +#ifdef USE_OBSOLETE_DDRAW class DDrawFB : public BaseWinFB { DECLARE_CLASS(DDrawFB, BaseWinFB) @@ -223,6 +224,7 @@ private: DDrawFB() {} }; +#endif class D3DFB : public BaseWinFB { @@ -250,7 +252,7 @@ public: bool PaintToWindow (); void SetVSync (bool vsync); void NewRefreshRate(); - void GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &color_type); + void GetScreenshotBuffer(const canvas_pixel_t *&buffer, int &pitch, ESSType &color_type); void ReleaseScreenshotBuffer(); void SetBlendingRect (int x1, int y1, int x2, int y2); bool Begin2D (bool copy3d); diff --git a/src/win32/win32video.cpp b/src/win32/win32video.cpp index 29bb905fb..3f3645d0b 100644 --- a/src/win32/win32video.cpp +++ b/src/win32/win32video.cpp @@ -221,8 +221,15 @@ bool Win32Video::InitD3D9 () // Enumerate available display modes. FreeModes (); +#ifndef PALETTEOUTPUT // To do: remove this again (AddD3DModes fails when there are too many modes available for videomenu to display) + AddMode(1920, 1080, 8, 1440, 0); // 1080p + AddMode(1920*2, 1080*2, 8, 1440, 0); // 4k + AddMode(2560, 1440, 8, 1440, 0); // 27" classic + AddMode(2560*2, 1440*2, 8, 1440*2, 0); // 5k +#else AddD3DModes (m_Adapter, D3DFMT_X8R8G8B8); AddD3DModes (m_Adapter, D3DFMT_R5G6B5); +#endif if (Args->CheckParm ("-2")) { // Force all modes to be pixel-doubled. ScaleModes (1); @@ -660,6 +667,10 @@ DFrameBuffer *Win32Video::CreateFrameBuffer (int width, int height, bool fullscr flashAmount = 0; } +#ifndef USE_OBSOLETE_DDRAW + fb = new D3DFB(m_Adapter, width, height, fullscreen); + LOG1("New fb created @ %p\n", fb); +#else if (D3D != NULL) { fb = new D3DFB (m_Adapter, width, height, fullscreen); @@ -668,6 +679,7 @@ DFrameBuffer *Win32Video::CreateFrameBuffer (int width, int height, bool fullscr { fb = new DDrawFB (width, height, fullscreen); } + LOG1 ("New fb created @ %p\n", fb); // If we could not create the framebuffer, try again with slightly @@ -729,6 +741,7 @@ DFrameBuffer *Win32Video::CreateFrameBuffer (int width, int height, bool fullscr fb = static_cast(CreateFrameBuffer (width, height, fullscreen, NULL)); } retry = 0; +#endif fb->SetFlash (flashColor, flashAmount); return fb; From 8aabc26cd94018238ed606b81b3d49fabbe429fd Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 30 May 2016 05:52:15 +0200 Subject: [PATCH 002/100] Created standalone rgba drawing functions --- src/CMakeLists.txt | 1 + src/r_draw.cpp | 2470 ++++++++++++++++++++++++++++++------------ src/r_draw.h | 165 ++- src/r_drawt.cpp | 332 +----- src/r_drawt_rgba.cpp | 883 +++++++++++++++ src/r_main.cpp | 4 +- src/r_plane.cpp | 11 +- src/r_plane.h | 4 + src/r_segs.cpp | 4 +- src/r_things.cpp | 97 +- src/r_things.h | 5 +- src/v_draw.cpp | 2 +- 12 files changed, 2896 insertions(+), 1082 deletions(-) create mode 100644 src/r_drawt_rgba.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 84d6f06b9..c90756b5d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -883,6 +883,7 @@ set( FASTMATH_PCH_SOURCES r_bsp.cpp r_draw.cpp r_drawt.cpp + r_drawt_rgba.cpp r_main.cpp r_plane.cpp r_segs.cpp diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 044910008..d2b694f05 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -38,6 +38,7 @@ #include "r_data/r_translate.h" #include "v_palette.h" #include "r_data/colormaps.h" +#include "r_plane.h" #include "gi.h" #include "stats.h" @@ -73,6 +74,19 @@ int scaledviewwidth; // screen depth and asm/no asm. void (*R_DrawColumnHoriz)(void); void (*R_DrawColumn)(void); +void (*R_FillColumn)(void); +void (*R_FillAddColumn)(void); +void (*R_FillAddClampColumn)(void); +void (*R_FillSubClampColumn)(void); +void (*R_FillRevSubClampColumn)(void); +void (*R_DrawAddColumn)(void); +void (*R_DrawTlatedAddColumn)(void); +void (*R_DrawAddClampColumn)(void); +void (*R_DrawAddClampTranslatedColumn)(void); +void (*R_DrawSubClampColumn)(void); +void (*R_DrawSubClampTranslatedColumn)(void); +void (*R_DrawRevSubClampColumn)(void); +void (*R_DrawRevSubClampTranslatedColumn)(void); void (*R_DrawFuzzColumn)(void); void (*R_DrawTranslatedColumn)(void); void (*R_DrawShadedColumn)(void); @@ -82,7 +96,44 @@ void (*R_DrawSpanTranslucent)(void); void (*R_DrawSpanMaskedTranslucent)(void); void (*R_DrawSpanAddClamp)(void); void (*R_DrawSpanMaskedAddClamp)(void); -void (*rt_map4cols)(int,int,int); +void (*R_FillSpan)(void); +void (*R_FillColumnHoriz)(void); +void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip); +void (*R_MapColoredPlane)(int y, int x1); +void (*R_DrawParticle)(vissprite_t *); +fixed_t (*tmvline1_add)(); +void (*tmvline4_add)(); +fixed_t (*tmvline1_addclamp)(); +void (*tmvline4_addclamp)(); +fixed_t (*tmvline1_subclamp)(); +void (*tmvline4_subclamp)(); +fixed_t (*tmvline1_revsubclamp)(); +void (*tmvline4_revsubclamp)(); +void (*rt_copy1col)(int hx, int sx, int yl, int yh); +void (*rt_copy4cols)(int sx, int yl, int yh); +void (*rt_shaded1col)(int hx, int sx, int yl, int yh); +void (*rt_shaded4cols)(int sx, int yl, int yh); +void (*rt_map1col)(int hx, int sx, int yl, int yh); +void (*rt_add1col)(int hx, int sx, int yl, int yh); +void (*rt_addclamp1col)(int hx, int sx, int yl, int yh); +void (*rt_subclamp1col)(int hx, int sx, int yl, int yh); +void (*rt_revsubclamp1col)(int hx, int sx, int yl, int yh); +void (*rt_tlate1col)(int hx, int sx, int yl, int yh); +void (*rt_tlateadd1col)(int hx, int sx, int yl, int yh); +void (*rt_tlateaddclamp1col)(int hx, int sx, int yl, int yh); +void (*rt_tlatesubclamp1col)(int hx, int sx, int yl, int yh); +void (*rt_tlaterevsubclamp1col)(int hx, int sx, int yl, int yh); +void (*rt_map4cols)(int sx, int yl, int yh); +void (*rt_add4cols)(int sx, int yl, int yh); +void (*rt_addclamp4cols)(int sx, int yl, int yh); +void (*rt_subclamp4cols)(int sx, int yl, int yh); +void (*rt_revsubclamp4cols)(int sx, int yl, int yh); +void (*rt_tlate4cols)(int sx, int yl, int yh); +void (*rt_tlateadd4cols)(int sx, int yl, int yh); +void (*rt_tlateaddclamp4cols)(int sx, int yl, int yh); +void (*rt_tlatesubclamp4cols)(int sx, int yl, int yh); +void (*rt_tlaterevsubclamp4cols)(int sx, int yl, int yh); +void (*rt_initcols)(canvas_pixel_t *buffer); // // R_DrawColumn @@ -198,10 +249,6 @@ void R_DrawColumnP_C (void) // Framebuffer destination address. dest = dc_dest; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); -#endif - // Determine scaling, // which is the only mapping to be done. fracstep = dc_iscale; @@ -221,11 +268,7 @@ void R_DrawColumnP_C (void) { // Re-map color indices from wall texture column // using a lighting/special effects LUT. -#ifndef PALETTEOUTPUT - *dest = shade_pal_index(colormap[source[frac>>FRACBITS]], light); -#else *dest = colormap[source[frac >> FRACBITS]]; -#endif dest += pitch; frac += fracstep; @@ -235,8 +278,78 @@ void R_DrawColumnP_C (void) } #endif +void R_DrawColumnP_RGBA_C() +{ + int count; + canvas_pixel_t* dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + + // Zero length, column does not exceed a pixel. + if (count <= 0) + return; + + // Framebuffer destination address. + dest = dc_dest; + + uint32_t light = calc_light_multiplier(dc_light); + + // Determine scaling, + // which is the only mapping to be done. + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + // [RH] Get local copies of these variables so that the compiler + // has a better chance of optimizing this well. + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + + // Inner loop that does the actual texture mapping, + // e.g. a DDA-lile scaling. + // This is as fast as it gets. + do + { + // Re-map color indices from wall texture column + // using a lighting/special effects LUT. + *dest = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + + dest += pitch; + frac += fracstep; + + } while (--count); + } +} + // [RH] Just fills a column with a color -void R_FillColumnP (void) +void R_FillColumnP_C (void) +{ + int count; + canvas_pixel_t* dest; + + count = dc_count; + + if (count <= 0) + return; + + dest = dc_dest; + + { + int pitch = dc_pitch; + BYTE color = dc_color; + + do + { + *dest = color; + dest += pitch; + } while (--count); + } +} + +void R_FillColumnP_RGBA() { int count; canvas_pixel_t* dest; @@ -248,9 +361,7 @@ void R_FillColumnP (void) dest = dc_dest; -#ifndef PALETTEOUTPUT uint32_t light = calc_light_multiplier(dc_light); -#endif { int pitch = dc_pitch; @@ -258,17 +369,40 @@ void R_FillColumnP (void) do { -#ifndef PALETTEOUTPUT *dest = shade_pal_index(color, light); -#else - *dest = color; -#endif dest += pitch; } while (--count); } } -void R_FillAddColumn (void) +void R_FillAddColumn_C (void) +{ + int count; + canvas_pixel_t *dest; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + int pitch = dc_pitch; + + DWORD *bg2rgb; + DWORD fg; + + bg2rgb = dc_destblend; + fg = dc_srccolor; + + do + { + DWORD bg; + bg = (fg + bg2rgb[*dest]) | 0x1f07c1f; + *dest = RGB32k.All[bg & (bg>>15)]; + dest += pitch; + } while (--count); +} + +void R_FillAddColumn_RGBA_C() { int count; canvas_pixel_t *dest; @@ -280,7 +414,6 @@ void R_FillAddColumn (void) dest = dc_dest; int pitch = dc_pitch; -#ifndef PALETTEOUTPUT uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; @@ -298,24 +431,9 @@ void R_FillAddColumn (void) *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; } while (--count); -#else - DWORD *bg2rgb; - DWORD fg; - - bg2rgb = dc_destblend; - fg = dc_srccolor; - - do - { - DWORD bg; - bg = (fg + bg2rgb[*dest]) | 0x1f07c1f; - *dest = RGB32k.All[bg & (bg>>15)]; - dest += pitch; - } while (--count); -#endif } -void R_FillAddClampColumn (void) +void R_FillAddClampColumn_C (void) { int count; canvas_pixel_t *dest; @@ -327,25 +445,6 @@ void R_FillAddClampColumn (void) dest = dc_dest; int pitch = dc_pitch; -#ifndef PALETTEOUTPUT - uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; - uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; - uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; - - do - { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); -#else DWORD *bg2rgb; DWORD fg; @@ -365,10 +464,9 @@ void R_FillAddClampColumn (void) *dest = RGB32k.All[a & (a>>15)]; dest += pitch; } while (--count); -#endif } -void R_FillSubClampColumn (void) +void R_FillAddClampColumn_RGBA() { int count; canvas_pixel_t *dest; @@ -380,7 +478,6 @@ void R_FillSubClampColumn (void) dest = dc_dest; int pitch = dc_pitch; -#ifndef PALETTEOUTPUT uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; @@ -391,14 +488,27 @@ void R_FillSubClampColumn (void) uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 255; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 255; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 255; + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; } while (--count); -#else +} + +void R_FillSubClampColumn_C (void) +{ + int count; + canvas_pixel_t *dest; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + int pitch = dc_pitch; + DWORD *bg2rgb; DWORD fg; @@ -417,10 +527,9 @@ void R_FillSubClampColumn (void) *dest = RGB32k.All[a & (a>>15)]; dest += pitch; } while (--count); -#endif } -void R_FillRevSubClampColumn (void) +void R_FillSubClampColumn_RGBA() { int count; canvas_pixel_t *dest; @@ -432,7 +541,6 @@ void R_FillRevSubClampColumn (void) dest = dc_dest; int pitch = dc_pitch; -#ifndef PALETTEOUTPUT uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; @@ -443,14 +551,27 @@ void R_FillRevSubClampColumn (void) uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 255; - uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 255; - uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 255; + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 255; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 255; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 255; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; } while (--count); -#else +} + +void R_FillRevSubClampColumn_C (void) +{ + int count; + canvas_pixel_t *dest; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + int pitch = dc_pitch; + DWORD *bg2rgb; DWORD fg; @@ -469,7 +590,37 @@ void R_FillRevSubClampColumn (void) *dest = RGB32k.All[a & (a>>15)]; dest += pitch; } while (--count); -#endif +} + +void R_FillRevSubClampColumn_RGBA() +{ + int count; + canvas_pixel_t *dest; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + int pitch = dc_pitch; + + uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; + uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; + uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; + + do + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 255; + uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 255; + uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 255; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); } // @@ -542,7 +693,77 @@ void R_DrawFuzzColumnP_C (void) dest = ylookup[dc_yl] + dc_x + dc_destorg; -#ifndef PALETTEOUTPUT + // colormap #6 is used for shading (of 0-31, a bit brighter than average) + { + // [RH] Make local copies of global vars to try and improve + // the optimizations made by the compiler. + int pitch = dc_pitch; + int fuzz = fuzzpos; + int cnt; + BYTE *map = &NormalLight.Maps[6*256]; + + // [RH] Split this into three separate loops to minimize + // the number of times fuzzpos needs to be clamped. + if (fuzz) + { + cnt = MIN(FUZZTABLE-fuzz,count); + count -= cnt; + do + { + *dest = map[dest[fuzzoffset[fuzz++]]]; + dest += pitch; + } while (--cnt); + } + if (fuzz == FUZZTABLE || count > 0) + { + while (count >= FUZZTABLE) + { + fuzz = 0; + cnt = FUZZTABLE; + count -= FUZZTABLE; + do + { + *dest = map[dest[fuzzoffset[fuzz++]]]; + dest += pitch; + } while (--cnt); + } + fuzz = 0; + if (count > 0) + { + do + { + *dest = map[dest[fuzzoffset[fuzz++]]]; + dest += pitch; + } while (--count); + } + } + fuzzpos = fuzz; + } +} +#endif + +void R_DrawFuzzColumnP_RGBA_C() +{ + int count; + canvas_pixel_t *dest; + + // Adjust borders. Low... + if (dc_yl == 0) + dc_yl = 1; + + // .. and high. + if (dc_yh > fuzzviewheight) + dc_yh = fuzzviewheight; + + count = dc_yh - dc_yl; + + // Zero length. + if (count < 0) + return; + + count++; + + dest = ylookup[dc_yl] + dc_x + dc_destorg; // Note: this implementation assumes this function is only used for the pinky shadow effect (i.e. no other fancy colormap than black) // I'm not sure if this is really always the case or not. @@ -618,58 +839,7 @@ void R_DrawFuzzColumnP_C (void) } fuzzpos = fuzz; } - -#else - - // colormap #6 is used for shading (of 0-31, a bit brighter than average) - { - // [RH] Make local copies of global vars to try and improve - // the optimizations made by the compiler. - int pitch = dc_pitch; - int fuzz = fuzzpos; - int cnt; - BYTE *map = &NormalLight.Maps[6*256]; - - // [RH] Split this into three separate loops to minimize - // the number of times fuzzpos needs to be clamped. - if (fuzz) - { - cnt = MIN(FUZZTABLE-fuzz,count); - count -= cnt; - do - { - *dest = map[dest[fuzzoffset[fuzz++]]]; - dest += pitch; - } while (--cnt); - } - if (fuzz == FUZZTABLE || count > 0) - { - while (count >= FUZZTABLE) - { - fuzz = 0; - cnt = FUZZTABLE; - count -= FUZZTABLE; - do - { - *dest = map[dest[fuzzoffset[fuzz++]]]; - dest += pitch; - } while (--cnt); - } - fuzz = 0; - if (count > 0) - { - do - { - *dest = map[dest[fuzzoffset[fuzz++]]]; - dest += pitch; - } while (--count); - } - } - fuzzpos = fuzz; - } -#endif -} -#endif +} // // R_DrawTranlucentColumn @@ -733,7 +903,44 @@ void R_DrawAddColumnP_C (void) fracstep = dc_iscale; frac = dc_texturefrac; -#ifndef PALETTEOUTPUT + { + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + + do + { + DWORD fg = colormap[source[frac>>FRACBITS]]; + DWORD bg = *dest; + + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg>>15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } +} + +void R_DrawAddColumnP_RGBA_C() +{ + int count; + canvas_pixel_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + { const BYTE *source = dc_source; int pitch = dc_pitch; @@ -760,28 +967,6 @@ void R_DrawAddColumnP_C (void) frac += fracstep; } while (--count); } -#else - { - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - - do - { - DWORD fg = colormap[source[frac>>FRACBITS]]; - DWORD bg = *dest; - - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg>>15)]; - dest += pitch; - frac += fracstep; - } while (--count); - } -#endif } // @@ -803,9 +988,39 @@ void R_DrawTranslatedColumnP_C (void) if (count <= 0) return; -#ifndef PALETTEOUTPUT + dest = dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + // [RH] Local copies of global vars to improve compiler optimizations + BYTE *colormap = dc_colormap; + BYTE *translation = dc_translation; + const BYTE *source = dc_source; + int pitch = dc_pitch; + + do + { + *dest = colormap[translation[source[frac>>FRACBITS]]]; + dest += pitch; + frac += fracstep; + } while (--count); + } +} + +void R_DrawTranslatedColumnP_RGBA_C() +{ + int count; + canvas_pixel_t* dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + uint32_t light = calc_light_multiplier(dc_light); -#endif dest = dc_dest; @@ -821,20 +1036,54 @@ void R_DrawTranslatedColumnP_C (void) do { -#ifndef PALETTEOUTPUT *dest = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); -#else - *dest = colormap[translation[source[frac>>FRACBITS]]]; -#endif dest += pitch; - frac += fracstep; } while (--count); } } // Draw a column that is both translated and translucent -void R_DrawTlatedAddColumnP_C (void) +void R_DrawTlatedAddColumnP_C() +{ + int count; + canvas_pixel_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + BYTE *translation = dc_translation; + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + + do + { + DWORD fg = colormap[translation[source[frac>>FRACBITS]]]; + DWORD bg = *dest; + + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg >> 15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } +} + +void R_DrawTlatedAddColumnP_RGBA_C() { int count; canvas_pixel_t *dest; @@ -845,16 +1094,13 @@ void R_DrawTlatedAddColumnP_C (void) if (count <= 0) return; -#ifndef PALETTEOUTPUT uint32_t light = calc_light_multiplier(dc_light); -#endif dest = dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; -#ifndef PALETTEOUTPUT { BYTE *translation = dc_translation; BYTE *colormap = dc_colormap; @@ -882,29 +1128,6 @@ void R_DrawTlatedAddColumnP_C (void) frac += fracstep; } while (--count); } -#else - { - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - - do - { - DWORD fg = colormap[translation[source[frac>>FRACBITS]]]; - DWORD bg = *dest; - - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg + bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg >> 15)]; - dest += pitch; - frac += fracstep; - } while (--count); - } -#endif } // Draw a column whose "color" values are actually translucency @@ -925,7 +1148,41 @@ void R_DrawShadedColumnP_C (void) fracstep = dc_iscale; frac = dc_texturefrac; -#ifndef PALETTEOUTPUT + { + const BYTE *source = dc_source; + BYTE *colormap = dc_colormap; + int pitch = dc_pitch; + DWORD *fgstart = &Col2RGB8[0][dc_color]; + + do + { + DWORD val = colormap[source[frac>>FRACBITS]]; + DWORD fg = fgstart[val<<8]; + val = (Col2RGB8[64-val][*dest] + fg) | 0x1f07c1f; + *dest = RGB32k.All[val & (val>>15)]; + + dest += pitch; + frac += fracstep; + } while (--count); + } +} + +void R_DrawShadedColumnP_RGBA_C() +{ + int count; + canvas_pixel_t *dest; + fixed_t frac, fracstep; + + count = dc_count; + + if (count <= 0) + return; + + dest = dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -954,26 +1211,7 @@ void R_DrawShadedColumnP_C (void) frac += fracstep; } while (--count); } -#else - { - const BYTE *source = dc_source; - BYTE *colormap = dc_colormap; - int pitch = dc_pitch; - DWORD *fgstart = &Col2RGB8[0][dc_color]; - - do - { - DWORD val = colormap[source[frac>>FRACBITS]]; - DWORD fg = fgstart[val<<8]; - val = (Col2RGB8[64-val][*dest] + fg) | 0x1f07c1f; - *dest = RGB32k.All[val & (val>>15)]; - - dest += pitch; - frac += fracstep; - } while (--count); - } -#endif -} +} // Add source to destination, clamping it to white void R_DrawAddClampColumnP_C () @@ -992,7 +1230,6 @@ void R_DrawAddClampColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; -#ifndef PALETTEOUTPUT { const BYTE *source = dc_source; BYTE *colormap = dc_colormap; @@ -1019,30 +1256,50 @@ void R_DrawAddClampColumnP_C () frac += fracstep; } while (--count); } -#else +} + +void R_DrawAddClampColumnP_RGBA_C() +{ + int count; + canvas_pixel_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + { - BYTE *colormap = dc_colormap; const BYTE *source = dc_source; + BYTE *colormap = dc_colormap; int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; + uint32_t light = calc_light_multiplier(dc_light); do { - DWORD a = fg2rgb[colormap[source[frac>>FRACBITS]]] + bg2rgb[*dest]; - DWORD b = a; + uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a>>15)]; + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; frac += fracstep; } while (--count); } -#endif } // Add translated source to destination, clamping it to white @@ -1062,35 +1319,6 @@ void R_DrawAddClampTranslatedColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; -#ifndef PALETTEOUTPUT - { - BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - uint32_t light = calc_light_multiplier(dc_light); - - do - { - uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -#else { BYTE *translation = dc_translation; BYTE *colormap = dc_colormap; @@ -1114,7 +1342,51 @@ void R_DrawAddClampTranslatedColumnP_C () frac += fracstep; } while (--count); } -#endif +} + +void R_DrawAddClampTranslatedColumnP_RGBA_C() +{ + int count; + canvas_pixel_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + BYTE *translation = dc_translation; + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + + do + { + uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } } // Subtract destination from source, clamping it to black @@ -1134,7 +1406,45 @@ void R_DrawSubClampColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; -#ifndef PALETTEOUTPUT + { + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + + do + { + DWORD a = (fg2rgb[colormap[source[frac>>FRACBITS]]] | 0x40100400) - bg2rgb[*dest]; + DWORD b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a>>15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } +} + +void R_DrawSubClampColumnP_RGBA_C() +{ + int count; + canvas_pixel_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + { BYTE *colormap = dc_colormap; const BYTE *source = dc_source; @@ -1161,29 +1471,6 @@ void R_DrawSubClampColumnP_C () frac += fracstep; } while (--count); } -#else - { - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - DWORD a = (fg2rgb[colormap[source[frac>>FRACBITS]]] | 0x40100400) - bg2rgb[*dest]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a>>15)]; - dest += pitch; - frac += fracstep; - } while (--count); - } -#endif } // Subtract destination from source, clamping it to black @@ -1203,35 +1490,6 @@ void R_DrawSubClampTranslatedColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; -#ifndef PALETTEOUTPUT - { - BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - uint32_t light = calc_light_multiplier(dc_light); - - do - { - uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -#else { BYTE *translation = dc_translation; BYTE *colormap = dc_colormap; @@ -1254,7 +1512,51 @@ void R_DrawSubClampTranslatedColumnP_C () frac += fracstep; } while (--count); } -#endif +} + +void R_DrawSubClampTranslatedColumnP_RGBA_C() +{ + int count; + canvas_pixel_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + BYTE *translation = dc_translation; + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + + do + { + uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } } // Subtract source from destination, clamping it to black @@ -1274,7 +1576,45 @@ void R_DrawRevSubClampColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; -#ifndef PALETTEOUTPUT + { + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + + do + { + DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[source[frac>>FRACBITS]]]; + DWORD b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a>>15)]; + dest += pitch; + frac += fracstep; + } while (--count); + } +} + +void R_DrawRevSubClampColumnP_RGBA_C() +{ + int count; + canvas_pixel_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + { BYTE *colormap = dc_colormap; const BYTE *source = dc_source; @@ -1301,29 +1641,6 @@ void R_DrawRevSubClampColumnP_C () frac += fracstep; } while (--count); } -#else - { - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[source[frac>>FRACBITS]]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a>>15)]; - dest += pitch; - frac += fracstep; - } while (--count); - } -#endif } // Subtract source from destination, clamping it to black @@ -1343,7 +1660,46 @@ void R_DrawRevSubClampTranslatedColumnP_C () fracstep = dc_iscale; frac = dc_texturefrac; -#ifndef PALETTEOUTPUT + { + BYTE *translation = dc_translation; + BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; + int pitch = dc_pitch; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + + do + { + DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[translation[source[frac>>FRACBITS]]]]; + DWORD b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[(a>>15) & a]; + dest += pitch; + frac += fracstep; + } while (--count); + } +} + +void R_DrawRevSubClampTranslatedColumnP_RGBA_C() +{ + int count; + canvas_pixel_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + { BYTE *translation = dc_translation; BYTE *colormap = dc_colormap; @@ -1371,34 +1727,9 @@ void R_DrawRevSubClampTranslatedColumnP_C () frac += fracstep; } while (--count); } -#else - { - BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - - do - { - DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[translation[source[frac>>FRACBITS]]]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[(a>>15) & a]; - dest += pitch; - frac += fracstep; - } while (--count); - } -#endif } - // // R_DrawSpan // With DOOM style restrictions on view orientation, @@ -1549,15 +1880,84 @@ void R_DrawSpanP_C (void) xstep = ds_xstep; ystep = ds_ystep; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(ds_light); + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + + do + { + // Current texture index in u,v. + spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + *dest++ = colormap[source[spot]]; + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + + do + { + // Current texture index in u,v. + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + *dest++ = colormap[source[spot]]; + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } +} #endif +void R_DrawSpanP_RGBA_C() +{ + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + canvas_pixel_t* dest; + const BYTE* source = ds_source; + const BYTE* colormap = ds_colormap; + int count; + int spot; + +#ifdef RANGECHECK + if (ds_x2 < ds_x1 || ds_x1 < 0 + || ds_x2 >= screen->width || ds_y > screen->height) + { + I_Error("R_DrawSpan: %i to %i at %i", ds_x1, ds_x2, ds_y); + } + // dscount++; +#endif + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + uint32_t light = calc_light_multiplier(ds_light); + if (ds_xbits == 6 && ds_ybits == 6) { // 64x64 is the most common case by far, so special case it. -#ifndef PALETTEOUTPUT #ifndef NO_SSE __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; @@ -1589,14 +1989,14 @@ void R_DrawSpanP_C (void) // Lookup pixel from flat texture tile, // re-index using light/colormap. - __m128i fg = _mm_set_epi32(palette[p0], palette[p1], palette[p2], palette[p3]); + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); fg_hi = _mm_mullo_epi16(fg_hi, mlight); fg_hi = _mm_srli_epi16(fg_hi, 8); fg_lo = _mm_mullo_epi16(fg_lo, mlight); fg_lo = _mm_srli_epi16(fg_lo, 8); - fg = _mm_packus_epi16(fg_hi, fg_lo); + fg = _mm_packus_epi16(fg_lo, fg_hi); _mm_storeu_si128((__m128i*)dest, fg); // Next step in u,v. @@ -1604,21 +2004,16 @@ void R_DrawSpanP_C (void) } if (count == 0) return; -#endif #endif do { // Current texture index in u,v. - spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); // Lookup pixel from flat texture tile, // re-index using light/colormap. -#ifndef PALETTEOUTPUT *dest++ = shade_pal_index(colormap[source[spot]], light); -#else - *dest++ = colormap[source[spot]]; -#endif // Next step in u,v. xfrac += xstep; @@ -1638,11 +2033,7 @@ void R_DrawSpanP_C (void) // Lookup pixel from flat texture tile, // re-index using light/colormap. -#ifndef PALETTEOUTPUT *dest++ = shade_pal_index(colormap[source[spot]], light); -#else - *dest++ = colormap[source[spot]]; -#endif // Next step in u,v. xfrac += xstep; @@ -1651,6 +2042,8 @@ void R_DrawSpanP_C (void) } } +#ifndef X86_ASM + // [RH] Draw a span with holes void R_DrawSpanMaskedP_C (void) { @@ -1664,10 +2057,6 @@ void R_DrawSpanMaskedP_C (void) int count; int spot; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(ds_light); -#endif - xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -1689,11 +2078,7 @@ void R_DrawSpanMaskedP_C (void) texdata = source[spot]; if (texdata != 0) { -#ifndef PALETTEOUTPUT - *dest = shade_pal_index(colormap[texdata], light); -#else *dest = colormap[texdata]; -#endif } dest++; xfrac += xstep; @@ -1713,11 +2098,7 @@ void R_DrawSpanMaskedP_C (void) texdata = source[spot]; if (texdata != 0) { -#ifndef PALETTEOUTPUT - *dest = shade_pal_index(colormap[texdata], light); -#else *dest = colormap[texdata]; -#endif } dest++; xfrac += xstep; @@ -1727,6 +2108,71 @@ void R_DrawSpanMaskedP_C (void) } #endif +void R_DrawSpanMaskedP_RGBA_C() +{ + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + canvas_pixel_t* dest; + const BYTE* source = ds_source; + const BYTE* colormap = ds_colormap; + int count; + int spot; + + uint32_t light = calc_light_multiplier(ds_light); + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + BYTE texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { + *dest = shade_pal_index(colormap[texdata], light); + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + BYTE texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + *dest = shade_pal_index(colormap[texdata], light); + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } +} + + void R_DrawSpanTranslucentP_C (void) { dsfixed_t xfrac; @@ -1756,7 +2202,68 @@ void R_DrawSpanTranslucentP_C (void) if (ds_xbits == 6 && ds_ybits == 6) { // 64x64 is the most common case by far, so special case it. -#ifndef PALETTEOUTPUT + do + { + spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); + DWORD fg = colormap[source[spot]]; + DWORD bg = *dest; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + *dest++ = RGB32k.All[fg & (fg>>15)]; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + DWORD fg = colormap[source[spot]]; + DWORD bg = *dest; + fg = fg2rgb[fg]; + bg = bg2rgb[bg]; + fg = (fg+bg) | 0x1f07c1f; + *dest++ = RGB32k.All[fg & (fg>>15)]; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } +} + +void R_DrawSpanTranslucentP_RGBA_C() +{ + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + canvas_pixel_t* dest; + const BYTE* source = ds_source; + const BYTE* colormap = ds_colormap; + int count; + int spot; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + uint32_t light = calc_light_multiplier(ds_light); + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. do { spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); @@ -1779,24 +2286,9 @@ void R_DrawSpanTranslucentP_C (void) xfrac += xstep; yfrac += ystep; } while (--count); -#else - do - { - spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); - DWORD fg = colormap[source[spot]]; - DWORD bg = *dest; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest++ = RGB32k.All[fg & (fg>>15)]; - xfrac += xstep; - yfrac += ystep; - } while (--count); -#endif } else { -#ifndef PALETTEOUTPUT BYTE yshift = 32 - ds_ybits; BYTE xshift = yshift - ds_xbits; int xmask = ((1 << ds_xbits) - 1) << ds_ybits; @@ -1822,23 +2314,6 @@ void R_DrawSpanTranslucentP_C (void) xfrac += xstep; yfrac += ystep; } while (--count); -#else - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - DWORD fg = colormap[source[spot]]; - DWORD bg = *dest; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest++ = RGB32k.All[fg & (fg>>15)]; - xfrac += xstep; - yfrac += ystep; - } while (--count); -#endif } } @@ -1879,29 +2354,12 @@ void R_DrawSpanMaskedTranslucentP_C (void) texdata = source[spot]; if (texdata != 0) { -#ifndef PALETTEOUTPUT - uint32_t fg = shade_pal_index(colormap[texdata], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; -#else DWORD fg = colormap[texdata]; DWORD bg = *dest; fg = fg2rgb[fg]; bg = bg2rgb[bg]; fg = (fg+bg) | 0x1f07c1f; *dest = RGB32k.All[fg & (fg>>15)]; -#endif } dest++; xfrac += xstep; @@ -1921,29 +2379,12 @@ void R_DrawSpanMaskedTranslucentP_C (void) texdata = source[spot]; if (texdata != 0) { -#ifndef PALETTEOUTPUT - uint32_t fg = shade_pal_index(colormap[texdata], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; -#else DWORD fg = colormap[texdata]; DWORD bg = *dest; fg = fg2rgb[fg]; bg = bg2rgb[bg]; fg = (fg+bg) | 0x1f07c1f; *dest = RGB32k.All[fg & (fg>>15)]; -#endif } dest++; xfrac += xstep; @@ -1952,7 +2393,7 @@ void R_DrawSpanMaskedTranslucentP_C (void) } } -void R_DrawSpanAddClampP_C (void) +void R_DrawSpanMaskedTranslucentP_RGBA_C() { dsfixed_t xfrac; dsfixed_t yfrac; @@ -1978,6 +2419,96 @@ void R_DrawSpanAddClampP_C (void) xstep = ds_xstep; ystep = ds_ystep; + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + BYTE texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + BYTE texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } +} + +void R_DrawSpanAddClampP_C (void) +{ + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + canvas_pixel_t* dest; + const BYTE* source = ds_source; + const BYTE* colormap = ds_colormap; + int count; + int spot; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + if (ds_xbits == 6 && ds_ybits == 6) { // 64x64 is the most common case by far, so special case it. @@ -1985,22 +2516,6 @@ void R_DrawSpanAddClampP_C (void) { spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); -#ifndef PALETTEOUTPUT - uint32_t fg = shade_pal_index(colormap[source[spot]], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); - - *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; -#else DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; DWORD b = a; @@ -2010,7 +2525,6 @@ void R_DrawSpanAddClampP_C (void) b = b - (b >> 5); a |= b; *dest++ = RGB32k.All[a & (a>>15)]; -#endif xfrac += xstep; yfrac += ystep; @@ -2025,7 +2539,55 @@ void R_DrawSpanAddClampP_C (void) { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); -#ifndef PALETTEOUTPUT + DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; + DWORD b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest++ = RGB32k.All[a & (a>>15)]; + + xfrac += xstep; + yfrac += ystep; + } while (--count); + } +} + +void R_DrawSpanAddClampP_RGBA_C() +{ + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + canvas_pixel_t* dest; + const BYTE* source = ds_source; + const BYTE* colormap = ds_colormap; + int count; + int spot; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + + uint32_t light = calc_light_multiplier(ds_light); + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t fg = shade_pal_index(colormap[source[spot]], light); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -2040,17 +2602,34 @@ void R_DrawSpanAddClampP_C (void) uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; -#else - DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; - DWORD b = a; - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest++ = RGB32k.All[a & (a>>15)]; -#endif + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + uint32_t fg = shade_pal_index(colormap[source[spot]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; xfrac += xstep; yfrac += ystep; @@ -2095,22 +2674,6 @@ void R_DrawSpanMaskedAddClampP_C (void) texdata = source[spot]; if (texdata != 0) { -#ifndef PALETTEOUTPUT - uint32_t fg = shade_pal_index(colormap[texdata], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; -#else DWORD a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; DWORD b = a; @@ -2120,7 +2683,6 @@ void R_DrawSpanMaskedAddClampP_C (void) b = b - (b >> 5); a |= b; *dest = RGB32k.All[a & (a>>15)]; -#endif } dest++; xfrac += xstep; @@ -2140,7 +2702,60 @@ void R_DrawSpanMaskedAddClampP_C (void) texdata = source[spot]; if (texdata != 0) { -#ifndef PALETTEOUTPUT + DWORD a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; + DWORD b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a>>15)]; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } +} + +void R_DrawSpanMaskedAddClampP_RGBA_C() +{ + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + canvas_pixel_t* dest; + const BYTE* source = ds_source; + const BYTE* colormap = ds_colormap; + int count; + int spot; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + + uint32_t light = calc_light_multiplier(ds_light); + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + BYTE texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { uint32_t fg = shade_pal_index(colormap[texdata], light); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -2155,17 +2770,39 @@ void R_DrawSpanMaskedAddClampP_C (void) uint32_t blue = (fg_blue + bg_blue + 1) / 2; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; -#else - DWORD a = fg2rgb[colormap[texdata]] + bg2rgb[*dest]; - DWORD b = a; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + BYTE texdata; - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a>>15)]; -#endif + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } dest++; xfrac += xstep; @@ -2175,18 +2812,19 @@ void R_DrawSpanMaskedAddClampP_C (void) } // [RH] Just fill a span with a color -void R_FillSpan (void) +void R_FillSpan_C (void) +{ + memset (ylookup[ds_y] + ds_x1 + dc_destorg, ds_color, (ds_x2 - ds_x1 + 1)); +} + +void R_FillSpan_RGBA() { -#ifndef PALETTEOUTPUT canvas_pixel_t *dest = ylookup[ds_y] + ds_x1 + dc_destorg; int count = (ds_x2 - ds_x1 + 1); uint32_t light = calc_light_multiplier(ds_light); uint32_t color = shade_pal_index(ds_color, light); for (int i = 0; i < count; i++) dest[i] = color; -#else - memset (ylookup[ds_y] + ds_x1 + dc_destorg, ds_color, (ds_x2 - ds_x1 + 1) * sizeof(canvas_pixel_t)); -#endif } // Draw a voxel slab @@ -2383,17 +3021,33 @@ DWORD vlinec1 () int bits = vlinebits; int pitch = dc_pitch; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); + do + { + *dest = colormap[source[frac >> bits]]; + frac += fracstep; + dest += pitch; + } while (--count); + + return frac; +} #endif +DWORD vlinec1_RGBA() +{ + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + BYTE *colormap = dc_colormap; + int count = dc_count; + const BYTE *source = dc_source; + canvas_pixel_t *dest = dc_dest; + int bits = vlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + do { -#ifndef PALETTEOUTPUT - *dest = shade_pal_index(colormap[source[frac>>bits]], light); -#else - *dest = colormap[source[frac >> bits]]; -#endif + *dest = shade_pal_index(colormap[source[frac >> bits]], light); frac += fracstep; dest += pitch; } while (--count); @@ -2401,6 +3055,7 @@ DWORD vlinec1 () return frac; } +#if !defined(X86_ASM) void vlinec4 () { canvas_pixel_t *dest = dc_dest; @@ -2408,23 +3063,37 @@ void vlinec4 () int bits = vlinebits; DWORD place; -#ifndef PALETTEOUTPUT + do + { + dest[0] = palookupoffse[0][bufplce[0][(place=vplce[0])>>bits]]; vplce[0] = place+vince[0]; + dest[1] = palookupoffse[1][bufplce[1][(place=vplce[1])>>bits]]; vplce[1] = place+vince[1]; + dest[2] = palookupoffse[2][bufplce[2][(place=vplce[2])>>bits]]; vplce[2] = place+vince[2]; + dest[3] = palookupoffse[3][bufplce[3][(place=vplce[3])>>bits]]; vplce[3] = place+vince[3]; + dest += dc_pitch; + } while (--count); +} +#endif + +void vlinec4_RGBA() +{ + canvas_pixel_t *dest = dc_dest; + int count = dc_count; + int bits = vlinebits; + uint32_t light0 = calc_light_multiplier(palookuplight[0]); uint32_t light1 = calc_light_multiplier(palookuplight[1]); uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); #ifndef NO_SSE - __m128i mlight_hi = _mm_set_epi16(256, light0, light0, light0, 256, light1, light1, light1); - __m128i mlight_lo = _mm_set_epi16(256, light2, light2, light2, 256, light3, light3, light3); + __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); + __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); uint32_t *palette = (uint32_t*)GPalette.BaseColors; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; -#endif #endif do { -#ifndef PALETTEOUTPUT #ifndef NO_SSE DWORD place0 = local_vplce[0]; @@ -2442,14 +3111,14 @@ void vlinec4 () local_vplce[2] = place2 + local_vince[2]; local_vplce[3] = place3 + local_vince[3]; - __m128i fg = _mm_set_epi32(palette[p0], palette[p1], palette[p2], palette[p3]); + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); fg_hi = _mm_mullo_epi16(fg_hi, mlight_hi); fg_hi = _mm_srli_epi16(fg_hi, 8); fg_lo = _mm_mullo_epi16(fg_lo, mlight_lo); fg_lo = _mm_srli_epi16(fg_lo, 8); - fg = _mm_packus_epi16(fg_hi, fg_lo); + fg = _mm_packus_epi16(fg_lo, fg_hi); _mm_storeu_si128((__m128i*)dest, fg); #else @@ -2457,17 +3126,10 @@ void vlinec4 () dest[1] = shade_pal_index(palookupoffse[1][bufplce[1][(place = vplce[1]) >> bits]], light1); vplce[1] = place + vince[1]; dest[2] = shade_pal_index(palookupoffse[2][bufplce[2][(place = vplce[2]) >> bits]], light2); vplce[2] = place + vince[2]; dest[3] = shade_pal_index(palookupoffse[3][bufplce[3][(place = vplce[3]) >> bits]], light3); vplce[3] = place + vince[3]; -#endif -#else - dest[0] = palookupoffse[0][bufplce[0][(place=vplce[0])>>bits]]; vplce[0] = place+vince[0]; - dest[1] = palookupoffse[1][bufplce[1][(place=vplce[1])>>bits]]; vplce[1] = place+vince[1]; - dest[2] = palookupoffse[2][bufplce[2][(place=vplce[2])>>bits]]; vplce[2] = place+vince[2]; - dest[3] = palookupoffse[3][bufplce[3][(place=vplce[3])>>bits]]; vplce[3] = place+vince[3]; #endif dest += dc_pitch; } while (--count); -#ifndef PALETTEOUTPUT #ifndef NO_SSE // Is this needed? Global variables makes it tricky to know.. vplce[0] = local_vplce[0]; @@ -2479,9 +3141,7 @@ void vlinec4 () vince[2] = local_vince[2]; vince[3] = local_vince[3]; #endif -#endif } -#endif void setupmvline (int fracbits) { @@ -2506,20 +3166,40 @@ DWORD mvlinec1 () int bits = mvlinebits; int pitch = dc_pitch; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); -#endif - do { BYTE pix = source[frac>>bits]; if (pix != 0) { -#ifndef PALETTEOUTPUT - *dest = shade_pal_index(colormap[pix], light); -#else *dest = colormap[pix]; + } + frac += fracstep; + dest += pitch; + } while (--count); + + return frac; +} #endif + +DWORD mvlinec1_RGBA() +{ + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + BYTE *colormap = dc_colormap; + int count = dc_count; + const BYTE *source = dc_source; + canvas_pixel_t *dest = dc_dest; + int bits = mvlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + + do + { + BYTE pix = source[frac >> bits]; + if (pix != 0) + { + *dest = shade_pal_index(colormap[pix], light); } frac += fracstep; dest += pitch; @@ -2528,6 +3208,7 @@ DWORD mvlinec1 () return frac; } +#if !defined(X86_ASM) void mvlinec4 () { canvas_pixel_t *dest = dc_dest; @@ -2535,33 +3216,42 @@ void mvlinec4 () int bits = mvlinebits; DWORD place; -#ifndef PALETTEOUTPUT - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); -#endif - do { BYTE pix; - -#ifndef PALETTEOUTPUT - pix = bufplce[0][(place = vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(palookupoffse[0][pix], light0); vplce[0] = place + vince[0]; - pix = bufplce[1][(place = vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(palookupoffse[1][pix], light1); vplce[1] = place + vince[1]; - pix = bufplce[2][(place = vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(palookupoffse[2][pix], light2); vplce[2] = place + vince[2]; - pix = bufplce[3][(place = vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(palookupoffse[3][pix], light3); vplce[3] = place + vince[3]; -#else pix = bufplce[0][(place=vplce[0])>>bits]; if(pix) dest[0] = palookupoffse[0][pix]; vplce[0] = place+vince[0]; pix = bufplce[1][(place=vplce[1])>>bits]; if(pix) dest[1] = palookupoffse[1][pix]; vplce[1] = place+vince[1]; pix = bufplce[2][(place=vplce[2])>>bits]; if(pix) dest[2] = palookupoffse[2][pix]; vplce[2] = place+vince[2]; pix = bufplce[3][(place=vplce[3])>>bits]; if(pix) dest[3] = palookupoffse[3][pix]; vplce[3] = place+vince[3]; -#endif dest += dc_pitch; } while (--count); } #endif +void mvlinec4_RGBA() +{ + canvas_pixel_t *dest = dc_dest; + int count = dc_count; + int bits = mvlinebits; + DWORD place; + + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); + + do + { + BYTE pix; + pix = bufplce[0][(place = vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(palookupoffse[0][pix], light0); vplce[0] = place + vince[0]; + pix = bufplce[1][(place = vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(palookupoffse[1][pix], light1); vplce[1] = place + vince[1]; + pix = bufplce[2][(place = vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(palookupoffse[2][pix], light2); vplce[2] = place + vince[2]; + pix = bufplce[3][(place = vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(palookupoffse[3][pix], light3); vplce[3] = place + vince[3]; + dest += dc_pitch; + } while (--count); +} + + extern "C" short spanend[MAXHEIGHT]; extern float rw_light; extern float rw_lightstep; @@ -2572,21 +3262,13 @@ static void R_DrawFogBoundarySection (int y, int y2, int x1) BYTE *colormap = dc_colormap; canvas_pixel_t *dest = ylookup[y] + dc_destorg; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); -#endif - for (; y < y2; ++y) { int x2 = spanend[y]; int x = x1; do { -#ifndef PALETTEOUTPUT - dest[x] = shade_pal_index(colormap[dest[x]], light); -#else dest[x] = colormap[dest[x]]; -#endif } while (++x <= x2); dest += dc_pitch; } @@ -2598,21 +3280,13 @@ static void R_DrawFogBoundaryLine (int y, int x) BYTE *colormap = dc_colormap; canvas_pixel_t *dest = ylookup[y] + dc_destorg; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); -#endif - do { -#ifndef PALETTEOUTPUT - dest[x] = shade_pal_index(colormap[dest[x]], light); -#else dest[x] = colormap[dest[x]]; -#endif } while (++x <= x2); } -void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip) +void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip) { // This is essentially the same as R_MapVisPlane but with an extra step // to create new horizontal spans whenever the light changes enough that @@ -2703,6 +3377,133 @@ void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip) } } +static void R_DrawFogBoundarySection_RGBA(int y, int y2, int x1) +{ + BYTE *colormap = dc_colormap; + canvas_pixel_t *dest = ylookup[y] + dc_destorg; + + uint32_t light = calc_light_multiplier(dc_light); + + for (; y < y2; ++y) + { + int x2 = spanend[y]; + int x = x1; + do + { + dest[x] = shade_pal_index(colormap[dest[x]], light); + } while (++x <= x2); + dest += dc_pitch; + } +} + +static void R_DrawFogBoundaryLine_RGBA(int y, int x) +{ + int x2 = spanend[y]; + BYTE *colormap = dc_colormap; + canvas_pixel_t *dest = ylookup[y] + dc_destorg; + + uint32_t light = calc_light_multiplier(dc_light); + + do + { + dest[x] = shade_pal_index(colormap[dest[x]], light); + } while (++x <= x2); +} + +void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip) +{ + // To do: we do not need to create new spans when using rgba output - instead we should calculate light on a per pixel basis + + // This is essentially the same as R_MapVisPlane but with an extra step + // to create new horizontal spans whenever the light changes enough that + // we need to use a new colormap. + + double lightstep = rw_lightstep; + double light = rw_light + rw_lightstep*(x2 - x1 - 1); + int x = x2 - 1; + int t2 = uclip[x]; + int b2 = dclip[x]; + int rcolormap = GETPALOOKUP(light, wallshade); + int lcolormap; + BYTE *basecolormapdata = basecolormap->Maps; + + if (b2 > t2) + { + clearbufshort(spanend + t2, b2 - t2, x); + } + + dc_colormap = basecolormapdata + (rcolormap << COLORMAPSHIFT); + dc_light = 0; + + for (--x; x >= x1; --x) + { + int t1 = uclip[x]; + int b1 = dclip[x]; + const int xr = x + 1; + int stop; + + light -= rw_lightstep; + lcolormap = GETPALOOKUP(light, wallshade); + if (lcolormap != rcolormap) + { + if (t2 < b2 && rcolormap != 0) + { // Colormap 0 is always the identity map, so rendering it is + // just a waste of time. + R_DrawFogBoundarySection_RGBA(t2, b2, xr); + } + if (t1 < t2) t2 = t1; + if (b1 > b2) b2 = b1; + if (t2 < b2) + { + clearbufshort(spanend + t2, b2 - t2, x); + } + rcolormap = lcolormap; + dc_colormap = basecolormapdata + (lcolormap << COLORMAPSHIFT); + dc_light = 0; + } + else + { + if (dc_colormap != basecolormapdata) + { + stop = MIN(t1, b2); + while (t2 < stop) + { + R_DrawFogBoundaryLine_RGBA(t2++, xr); + } + stop = MAX(b1, t2); + while (b2 > stop) + { + R_DrawFogBoundaryLine_RGBA(--b2, xr); + } + } + else + { + t2 = MAX(t2, MIN(t1, b2)); + b2 = MIN(b2, MAX(b1, t2)); + } + + stop = MIN(t2, b1); + while (t1 < stop) + { + spanend[t1++] = x; + } + stop = MAX(b2, t2); + while (b1 > stop) + { + spanend[--b1] = x; + } + } + + t2 = uclip[x]; + b2 = dclip[x]; + } + if (t2 < b2 && rcolormap != 0) + { + R_DrawFogBoundarySection_RGBA(t2, b2, x1); + } +} + + int tmvlinebits; void setuptmvline (int bits) @@ -2710,7 +3511,40 @@ void setuptmvline (int bits) tmvlinebits = bits; } -fixed_t tmvline1_add () +fixed_t tmvline1_add_C () +{ + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + BYTE *colormap = dc_colormap; + int count = dc_count; + const BYTE *source = dc_source; + canvas_pixel_t *dest = dc_dest; + int bits = tmvlinebits; + int pitch = dc_pitch; + + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + + uint32_t light = calc_light_multiplier(dc_light); + + do + { + BYTE pix = source[frac>>bits]; + if (pix != 0) + { + DWORD fg = fg2rgb[colormap[pix]]; + DWORD bg = bg2rgb[*dest]; + fg = (fg+bg) | 0x1f07c1f; + *dest = RGB32k.All[fg & (fg>>15)]; + } + frac += fracstep; + dest += pitch; + } while (--count); + + return frac; +} + +fixed_t tmvline1_add_RGBA() { DWORD fracstep = dc_iscale; DWORD frac = dc_texturefrac; @@ -2728,7 +3562,6 @@ fixed_t tmvline1_add () do { -#ifndef PALETTEOUTPUT BYTE pix = source[frac >> bits]; if (pix != 0) { @@ -2747,16 +3580,6 @@ fixed_t tmvline1_add () *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } -#else - BYTE pix = source[frac>>bits]; - if (pix != 0) - { - DWORD fg = fg2rgb[colormap[pix]]; - DWORD bg = bg2rgb[*dest]; - fg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg>>15)]; - } -#endif frac += fracstep; dest += pitch; } while (--count); @@ -2764,7 +3587,40 @@ fixed_t tmvline1_add () return frac; } -void tmvline4_add () +void tmvline4_add_C () +{ + canvas_pixel_t *dest = dc_dest; + int count = dc_count; + int bits = tmvlinebits; + + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + + do + { + for (int i = 0; i < 4; ++i) + { + BYTE pix = bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + DWORD fg = fg2rgb[palookupoffse[i][pix]]; + DWORD bg = bg2rgb[dest[i]]; + fg = (fg+bg) | 0x1f07c1f; + dest[i] = RGB32k.All[fg & (fg>>15)]; + } + vplce[i] += vince[i]; + } + dest += dc_pitch; + } while (--count); +} + +void tmvline4_add_RGBA() { canvas_pixel_t *dest = dc_dest; int count = dc_count; @@ -2786,7 +3642,6 @@ void tmvline4_add () BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { -#ifndef PALETTEOUTPUT uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -2801,12 +3656,6 @@ void tmvline4_add () uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; -#else - DWORD fg = fg2rgb[palookupoffse[i][pix]]; - DWORD bg = bg2rgb[dest[i]]; - fg = (fg+bg) | 0x1f07c1f; - dest[i] = RGB32k.All[fg & (fg>>15)]; -#endif } vplce[i] += vince[i]; } @@ -2814,7 +3663,7 @@ void tmvline4_add () } while (--count); } -fixed_t tmvline1_addclamp () +fixed_t tmvline1_addclamp_C () { DWORD fracstep = dc_iscale; DWORD frac = dc_texturefrac; @@ -2835,7 +3684,44 @@ fixed_t tmvline1_addclamp () BYTE pix = source[frac>>bits]; if (pix != 0) { -#ifndef PALETTEOUTPUT + DWORD a = fg2rgb[colormap[pix]] + bg2rgb[*dest]; + DWORD b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a>>15)]; + } + frac += fracstep; + dest += pitch; + } while (--count); + + return frac; +} + +fixed_t tmvline1_addclamp_RGBA() +{ + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + BYTE *colormap = dc_colormap; + int count = dc_count; + const BYTE *source = dc_source; + canvas_pixel_t *dest = dc_dest; + int bits = tmvlinebits; + int pitch = dc_pitch; + + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + + uint32_t light = calc_light_multiplier(dc_light); + + do + { + BYTE pix = source[frac >> bits]; + if (pix != 0) + { uint32_t fg = shade_pal_index(colormap[pix], light); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -2850,17 +3736,6 @@ fixed_t tmvline1_addclamp () uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; -#else - DWORD a = fg2rgb[colormap[pix]] + bg2rgb[*dest]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[a & (a>>15)]; -#endif } frac += fracstep; dest += pitch; @@ -2869,7 +3744,7 @@ fixed_t tmvline1_addclamp () return frac; } -void tmvline4_addclamp () +void tmvline4_addclamp_C () { canvas_pixel_t *dest = dc_dest; int count = dc_count; @@ -2878,6 +3753,35 @@ void tmvline4_addclamp () DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + do + { + for (int i = 0; i < 4; ++i) + { + BYTE pix = bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + DWORD a = fg2rgb[palookupoffse[i][pix]] + bg2rgb[dest[i]]; + DWORD b = a; + + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + dest[i] = RGB32k.All[a & (a>>15)]; + } + vplce[i] += vince[i]; + } + dest += dc_pitch; + } while (--count); +} + +void tmvline4_addclamp_RGBA() +{ + canvas_pixel_t *dest = dc_dest; + int count = dc_count; + int bits = tmvlinebits; + uint32_t light[4]; light[0] = calc_light_multiplier(palookuplight[0]); light[1] = calc_light_multiplier(palookuplight[1]); @@ -2891,7 +3795,6 @@ void tmvline4_addclamp () BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { -#ifndef PALETTEOUTPUT uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -2906,17 +3809,6 @@ void tmvline4_addclamp () uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; -#else - DWORD a = fg2rgb[palookupoffse[i][pix]] + bg2rgb[dest[i]]; - DWORD b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[i] = RGB32k.All[a & (a>>15)]; -#endif } vplce[i] += vince[i]; } @@ -2924,7 +3816,7 @@ void tmvline4_addclamp () } while (--count); } -fixed_t tmvline1_subclamp () +fixed_t tmvline1_subclamp_C () { DWORD fracstep = dc_iscale; DWORD frac = dc_texturefrac; @@ -2938,14 +3830,45 @@ fixed_t tmvline1_subclamp () DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; - uint32_t light = calc_light_multiplier(dc_light); - do { BYTE pix = source[frac>>bits]; if (pix != 0) { -#ifndef PALETTEOUTPUT + DWORD a = (fg2rgb[colormap[pix]] | 0x40100400) - bg2rgb[*dest]; + DWORD b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a>>15)]; + } + frac += fracstep; + dest += pitch; + } while (--count); + + return frac; +} + +fixed_t tmvline1_subclamp_RGBA() +{ + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + BYTE *colormap = dc_colormap; + int count = dc_count; + const BYTE *source = dc_source; + canvas_pixel_t *dest = dc_dest; + int bits = tmvlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + + do + { + BYTE pix = source[frac >> bits]; + if (pix != 0) + { uint32_t fg = shade_pal_index(colormap[pix], light); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -2960,16 +3883,6 @@ fixed_t tmvline1_subclamp () uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; -#else - DWORD a = (fg2rgb[colormap[pix]] | 0x40100400) - bg2rgb[*dest]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a>>15)]; -#endif } frac += fracstep; dest += pitch; @@ -2978,7 +3891,7 @@ fixed_t tmvline1_subclamp () return frac; } -void tmvline4_subclamp () +void tmvline4_subclamp_C () { canvas_pixel_t *dest = dc_dest; int count = dc_count; @@ -2987,6 +3900,34 @@ void tmvline4_subclamp () DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; + do + { + for (int i = 0; i < 4; ++i) + { + BYTE pix = bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + DWORD a = (fg2rgb[palookupoffse[i][pix]] | 0x40100400) - bg2rgb[dest[i]]; + DWORD b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[i] = RGB32k.All[a & (a>>15)]; + } + vplce[i] += vince[i]; + } + dest += dc_pitch; + } while (--count); +} + +void tmvline4_subclamp_RGBA() +{ + canvas_pixel_t *dest = dc_dest; + int count = dc_count; + int bits = tmvlinebits; + uint32_t light[4]; light[0] = calc_light_multiplier(palookuplight[0]); light[1] = calc_light_multiplier(palookuplight[1]); @@ -3000,7 +3941,6 @@ void tmvline4_subclamp () BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { -#ifndef PALETTEOUTPUT uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -3015,16 +3955,6 @@ void tmvline4_subclamp () uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; -#else - DWORD a = (fg2rgb[palookupoffse[i][pix]] | 0x40100400) - bg2rgb[dest[i]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[i] = RGB32k.All[a & (a>>15)]; -#endif } vplce[i] += vince[i]; } @@ -3032,7 +3962,7 @@ void tmvline4_subclamp () } while (--count); } -fixed_t tmvline1_revsubclamp () +fixed_t tmvline1_revsubclamp_C () { DWORD fracstep = dc_iscale; DWORD frac = dc_texturefrac; @@ -3046,14 +3976,45 @@ fixed_t tmvline1_revsubclamp () DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; - uint32_t light = calc_light_multiplier(dc_light); - do { BYTE pix = source[frac>>bits]; if (pix != 0) { -#ifndef PALETTEOUTPUT + DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[pix]]; + DWORD b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + *dest = RGB32k.All[a & (a>>15)]; + } + frac += fracstep; + dest += pitch; + } while (--count); + + return frac; +} + +fixed_t tmvline1_revsubclamp_RGBA() +{ + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + BYTE *colormap = dc_colormap; + int count = dc_count; + const BYTE *source = dc_source; + canvas_pixel_t *dest = dc_dest; + int bits = tmvlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + + do + { + BYTE pix = source[frac >> bits]; + if (pix != 0) + { uint32_t fg = shade_pal_index(colormap[pix], light); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -3068,16 +4029,6 @@ fixed_t tmvline1_revsubclamp () uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; -#else - DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[pix]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[a & (a>>15)]; -#endif } frac += fracstep; dest += pitch; @@ -3086,7 +4037,38 @@ fixed_t tmvline1_revsubclamp () return frac; } -void tmvline4_revsubclamp () +void tmvline4_revsubclamp_C () +{ + canvas_pixel_t *dest = dc_dest; + int count = dc_count; + int bits = tmvlinebits; + + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + + do + { + for (int i = 0; i < 4; ++i) + { + BYTE pix = bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + DWORD a = (bg2rgb[dest[i]] | 0x40100400) - fg2rgb[palookupoffse[i][pix]]; + DWORD b = a; + + b &= 0x40100400; + b = b - (b >> 5); + a &= b; + a |= 0x01f07c1f; + dest[i] = RGB32k.All[a & (a>>15)]; + } + vplce[i] += vince[i]; + } + dest += dc_pitch; + } while (--count); +} + +void tmvline4_revsubclamp_RGBA() { canvas_pixel_t *dest = dc_dest; int count = dc_count; @@ -3108,7 +4090,6 @@ void tmvline4_revsubclamp () BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { -#ifndef PALETTEOUTPUT uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -3123,16 +4104,6 @@ void tmvline4_revsubclamp () uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; -#else - DWORD a = (bg2rgb[dest[i]] | 0x40100400) - fg2rgb[palookupoffse[i][pix]]; - DWORD b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[i] = RGB32k.All[a & (a>>15)]; -#endif } vplce[i] += vince[i]; } @@ -3164,6 +4135,85 @@ const BYTE *R_GetColumn (FTexture *tex, int col) // [RH] Initialize the column drawer pointers void R_InitColumnDrawers () { +#ifndef PALETTEOUTPUT + + R_DrawColumnHoriz = R_DrawColumnHorizP_RGBA_C; + R_DrawColumn = R_DrawColumnP_RGBA_C; + R_DrawFuzzColumn = R_DrawFuzzColumnP_RGBA_C; + R_DrawTranslatedColumn = R_DrawTranslatedColumnP_RGBA_C; + R_DrawShadedColumn = R_DrawShadedColumnP_RGBA_C; + R_DrawSpan = R_DrawSpanP_RGBA_C; + R_DrawSpanMasked = R_DrawSpanMaskedP_RGBA_C; + rt_map4cols = rt_map4cols_RGBA_c; + + R_DrawSpanTranslucent = R_DrawSpanTranslucentP_RGBA_C; + R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_RGBA_C; + R_DrawSpanAddClamp = R_DrawSpanAddClampP_RGBA_C; + R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_RGBA_C; + R_FillColumn = R_FillColumnP_RGBA; + R_FillAddColumn = R_FillAddColumn_RGBA_C; + R_FillAddClampColumn = R_FillAddClampColumn_RGBA; + R_FillSubClampColumn = R_FillSubClampColumn_RGBA; + R_FillRevSubClampColumn = R_FillRevSubClampColumn_RGBA; + R_DrawAddColumn = R_DrawAddColumnP_RGBA_C; + R_DrawTlatedAddColumn = R_DrawTlatedAddColumnP_RGBA_C; + R_DrawAddClampColumn = R_DrawAddClampColumnP_RGBA_C; + R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumnP_RGBA_C; + R_DrawSubClampColumn = R_DrawSubClampColumnP_RGBA_C; + R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumnP_RGBA_C; + R_DrawRevSubClampColumn = R_DrawRevSubClampColumnP_RGBA_C; + R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumnP_RGBA_C; + R_FillSpan = R_FillSpan_RGBA; + R_DrawFogBoundary = R_DrawFogBoundary_RGBA; + R_FillColumnHoriz = R_FillColumnHorizP_RGBA_C; + + R_DrawFogBoundary = R_DrawFogBoundary_RGBA; + R_MapColoredPlane = R_MapColoredPlane_RGBA; + R_DrawParticle = R_DrawParticle_RGBA; + + tmvline1_add = tmvline1_add_RGBA; + tmvline4_add = tmvline4_add_RGBA; + tmvline1_addclamp = tmvline1_addclamp_RGBA; + tmvline4_addclamp = tmvline4_addclamp_RGBA; + tmvline1_subclamp = tmvline1_subclamp_RGBA; + tmvline4_subclamp = tmvline4_subclamp_RGBA; + tmvline1_revsubclamp = tmvline1_revsubclamp_RGBA; + tmvline4_revsubclamp = tmvline4_revsubclamp_RGBA; + + rt_copy1col = rt_copy1col_RGBA_c; + rt_copy4cols = rt_copy4cols_RGBA_c; + rt_map1col = rt_map1col_RGBA_c; + rt_shaded4cols = rt_shaded4cols_RGBA_c; + rt_add4cols = rt_add4cols_RGBA_c; + rt_addclamp4cols = rt_addclamp4cols_RGBA_c; + rt_shaded1col = rt_shaded1col_RGBA_c; + rt_add1col = rt_add1col_RGBA_c; + rt_addclamp1col = rt_addclamp1col_RGBA_c; + rt_subclamp1col = rt_subclamp1col_RGBA_c; + rt_revsubclamp1col = rt_revsubclamp1col_RGBA_c; + rt_tlate1col = rt_tlate1col_RGBA_c; + rt_tlateadd1col = rt_tlateadd1col_RGBA_c; + rt_tlateaddclamp1col = rt_tlateaddclamp1col_RGBA_c; + rt_tlatesubclamp1col = rt_tlatesubclamp1col_RGBA_c; + rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_RGBA_c; + rt_map4cols = rt_map4cols_RGBA_c; + rt_subclamp4cols = rt_subclamp4cols_RGBA_c; + rt_revsubclamp4cols = rt_revsubclamp4cols_RGBA_c; + rt_tlate4cols = rt_tlate4cols_RGBA_c; + rt_tlateadd4cols = rt_tlateadd4cols_RGBA_c; + rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_RGBA_c; + rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_RGBA_c; + rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_RGBA_c; + rt_initcols = rt_initcols_rgba; + + dovline1 = vlinec1_RGBA; + doprevline1 = vlinec1_RGBA; + dovline4 = vlinec4_RGBA; + domvline1 = mvlinec1_RGBA; + domvline4 = mvlinec4_RGBA; + +#else + #ifdef X86_ASM R_DrawColumn = R_DrawColumnP_ASM; R_DrawColumnHoriz = R_DrawColumnHorizP_ASM; @@ -3194,6 +4244,72 @@ void R_InitColumnDrawers () R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_C; R_DrawSpanAddClamp = R_DrawSpanAddClampP_C; R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_C; + R_FillColumn = R_FillColumnP_C; + R_FillAddColumn = R_FillAddColumn_C; + R_FillAddClampColumn = R_FillAddClampColumn_C; + R_FillSubClampColumn = R_FillSubClampColumn_C; + R_FillRevSubClampColumn = R_FillRevSubClampColumn_C; + R_DrawAddColumn = R_DrawAddColumnP_C; + R_DrawTlatedAddColumn = R_DrawTlatedAddColumnP_C; + R_DrawAddClampColumn = R_DrawAddClampColumnP_C; + R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumnP_C; + R_DrawSubClampColumn = R_DrawSubClampColumnP_C; + R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumnP_C; + R_DrawRevSubClampColumn = R_DrawRevSubClampColumnP_C; + R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumnP_C; + R_FillSpan = R_FillSpan_C; + R_DrawFogBoundary = R_DrawFogBoundary_C; + R_FillColumnHoriz = R_FillColumnHorizP_C; + + R_DrawFogBoundary = R_DrawFogBoundary_C; + R_MapColoredPlane = R_MapColoredPlane_C; + R_DrawParticle = R_DrawParticle_C; + + tmvline1_add = tmvline1_add_C; + tmvline4_add = tmvline4_add_C; + tmvline1_addclamp = tmvline1_addclamp_C; + tmvline4_addclamp = tmvline4_addclamp_C; + tmvline1_subclamp = tmvline1_subclamp_C; + tmvline4_subclamp = tmvline4_subclamp_C; + tmvline1_revsubclamp = tmvline1_revsubclamp_C; + tmvline4_revsubclamp = tmvline4_revsubclamp_C; + +#ifdef X86_ASM + rt_copy1col = rt_copy1col_asm; + rt_copy4cols = rt_copy4cols_asm; + rt_map1col = rt_map1col_asm; + rt_shaded4cols = rt_shaded4cols_asm; + rt_add4cols = rt_add4cols_asm; + rt_addclamp4cols = rt_addclamp4cols_asm; +#else + rt_copy1col = rt_copy1col_c; + rt_copy4cols = rt_copy4cols_c; + rt_map1col = rt_map1col_c; + rt_shaded4cols = rt_shaded4cols_c; + rt_add4cols = rt_add4cols_c; + rt_addclamp4cols = rt_addclamp4cols_c; +#endif + rt_shaded1col = rt_shaded1col_c; + rt_add1col = rt_add1col_c; + rt_addclamp1col = rt_addclamp1col_c; + rt_subclamp1col = rt_subclamp1col_c; + rt_revsubclamp1col = rt_revsubclamp1col_c; + rt_tlate1col = rt_tlate1col_c; + rt_tlateadd1col = rt_tlateadd1col_c; + rt_tlateaddclamp1col = rt_tlateaddclamp1col_c; + rt_tlatesubclamp1col = rt_tlatesubclamp1col_c; + rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_c; + rt_map4cols = rt_map4cols_c; + rt_subclamp4cols = rt_subclamp4cols_c; + rt_revsubclamp4cols = rt_revsubclamp4cols_c; + rt_tlate4cols = rt_tlate4cols_c; + rt_tlateadd4cols = rt_tlateadd4cols_c; + rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_c; + rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_c; + rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_c; + rt_initcols = rt_initcols_pal; + +#endif } // [RH] Choose column drawers in a single place @@ -3211,7 +4327,7 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags) { if (flags & STYLEF_ColorIsFixed) { - colfunc = R_FillColumnP; + colfunc = R_FillColumn; hcolfunc_post1 = rt_copy1col; hcolfunc_post4 = rt_copy4cols; } @@ -3261,13 +4377,13 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags) } else if (dc_translation == NULL) { - colfunc = R_DrawAddColumnP_C; + colfunc = R_DrawAddColumn; hcolfunc_post1 = rt_add1col; hcolfunc_post4 = rt_add4cols; } else { - colfunc = R_DrawTlatedAddColumnP_C; + colfunc = R_DrawTlatedAddColumn; hcolfunc_post1 = rt_tlateadd1col; hcolfunc_post4 = rt_tlateadd4cols; } @@ -3282,13 +4398,13 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags) } else if (dc_translation == NULL) { - colfunc = R_DrawAddClampColumnP_C; + colfunc = R_DrawAddClampColumn; hcolfunc_post1 = rt_addclamp1col; hcolfunc_post4 = rt_addclamp4cols; } else { - colfunc = R_DrawAddClampTranslatedColumnP_C; + colfunc = R_DrawAddClampTranslatedColumn; hcolfunc_post1 = rt_tlateaddclamp1col; hcolfunc_post4 = rt_tlateaddclamp4cols; } @@ -3304,13 +4420,13 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags) } else if (dc_translation == NULL) { - colfunc = R_DrawSubClampColumnP_C; + colfunc = R_DrawSubClampColumn; hcolfunc_post1 = rt_subclamp1col; hcolfunc_post4 = rt_subclamp4cols; } else { - colfunc = R_DrawSubClampTranslatedColumnP_C; + colfunc = R_DrawSubClampTranslatedColumn; hcolfunc_post1 = rt_tlatesubclamp1col; hcolfunc_post4 = rt_tlatesubclamp4cols; } @@ -3329,13 +4445,13 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags) } else if (dc_translation == NULL) { - colfunc = R_DrawRevSubClampColumnP_C; + colfunc = R_DrawRevSubClampColumn; hcolfunc_post1 = rt_revsubclamp1col; hcolfunc_post4 = rt_revsubclamp4cols; } else { - colfunc = R_DrawRevSubClampTranslatedColumnP_C; + colfunc = R_DrawRevSubClampTranslatedColumn; hcolfunc_post1 = rt_tlaterevsubclamp1col; hcolfunc_post4 = rt_tlaterevsubclamp4cols; } @@ -3440,7 +4556,7 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, // dc_srccolor is used by the R_Fill* routines. It is premultiplied // with the alpha. dc_srccolor = ((((r*x)>>4)<<20) | ((g*x)>>4) | ((((b)*x)>>4)<<10)) & 0x3feffbff; - hcolfunc_pre = R_FillColumnHorizP; + hcolfunc_pre = R_FillColumnHoriz; dc_colormap = identitymap; dc_light = 0; } @@ -3459,25 +4575,25 @@ void R_FinishSetPatchStyle () bool R_GetTransMaskDrawers (fixed_t (**tmvline1)(), void (**tmvline4)()) { - if (colfunc == R_DrawAddColumnP_C) + if (colfunc == R_DrawAddColumn) { *tmvline1 = tmvline1_add; *tmvline4 = tmvline4_add; return true; } - if (colfunc == R_DrawAddClampColumnP_C) + if (colfunc == R_DrawAddClampColumn) { *tmvline1 = tmvline1_addclamp; *tmvline4 = tmvline4_addclamp; return true; } - if (colfunc == R_DrawSubClampColumnP_C) + if (colfunc == R_DrawSubClampColumn) { *tmvline1 = tmvline1_subclamp; *tmvline4 = tmvline4_subclamp; return true; } - if (colfunc == R_DrawRevSubClampColumnP_C) + if (colfunc == R_DrawRevSubClampColumn) { *tmvline1 = tmvline1_revsubclamp; *tmvline4 = tmvline4_revsubclamp; diff --git a/src/r_draw.h b/src/r_draw.h index 6f7a91154..17698c360 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -127,33 +127,33 @@ extern "C" void rt_copy1col_c (int hx, int sx, int yl, int yh); void rt_copy4cols_c (int sx, int yl, int yh); -void rt_shaded1col (int hx, int sx, int yl, int yh); +void rt_shaded1col_c (int hx, int sx, int yl, int yh); void rt_shaded4cols_c (int sx, int yl, int yh); void rt_shaded4cols_asm (int sx, int yl, int yh); void rt_map1col_c (int hx, int sx, int yl, int yh); -void rt_add1col (int hx, int sx, int yl, int yh); -void rt_addclamp1col (int hx, int sx, int yl, int yh); -void rt_subclamp1col (int hx, int sx, int yl, int yh); -void rt_revsubclamp1col (int hx, int sx, int yl, int yh); +void rt_add1col_c (int hx, int sx, int yl, int yh); +void rt_addclamp1col_c (int hx, int sx, int yl, int yh); +void rt_subclamp1col_c (int hx, int sx, int yl, int yh); +void rt_revsubclamp1col_c (int hx, int sx, int yl, int yh); -void rt_tlate1col (int hx, int sx, int yl, int yh); -void rt_tlateadd1col (int hx, int sx, int yl, int yh); -void rt_tlateaddclamp1col (int hx, int sx, int yl, int yh); -void rt_tlatesubclamp1col (int hx, int sx, int yl, int yh); -void rt_tlaterevsubclamp1col (int hx, int sx, int yl, int yh); +void rt_tlate1col_c (int hx, int sx, int yl, int yh); +void rt_tlateadd1col_c (int hx, int sx, int yl, int yh); +void rt_tlateaddclamp1col_c (int hx, int sx, int yl, int yh); +void rt_tlatesubclamp1col_c (int hx, int sx, int yl, int yh); +void rt_tlaterevsubclamp1col_c (int hx, int sx, int yl, int yh); void rt_map4cols_c (int sx, int yl, int yh); void rt_add4cols_c (int sx, int yl, int yh); void rt_addclamp4cols_c (int sx, int yl, int yh); -void rt_subclamp4cols (int sx, int yl, int yh); -void rt_revsubclamp4cols (int sx, int yl, int yh); +void rt_subclamp4cols_c (int sx, int yl, int yh); +void rt_revsubclamp4cols_c (int sx, int yl, int yh); -void rt_tlate4cols (int sx, int yl, int yh); -void rt_tlateadd4cols (int sx, int yl, int yh); -void rt_tlateaddclamp4cols (int sx, int yl, int yh); -void rt_tlatesubclamp4cols (int sx, int yl, int yh); -void rt_tlaterevsubclamp4cols (int sx, int yl, int yh); +void rt_tlate4cols_c (int sx, int yl, int yh); +void rt_tlateadd4cols_c (int sx, int yl, int yh); +void rt_tlateaddclamp4cols_c (int sx, int yl, int yh); +void rt_tlatesubclamp4cols_c (int sx, int yl, int yh); +void rt_tlaterevsubclamp4cols_c (int sx, int yl, int yh); void rt_copy1col_asm (int hx, int sx, int yl, int yh); void rt_map1col_asm (int hx, int sx, int yl, int yh); @@ -163,32 +163,83 @@ void rt_map4cols_asm1 (int sx, int yl, int yh); void rt_map4cols_asm2 (int sx, int yl, int yh); void rt_add4cols_asm (int sx, int yl, int yh); void rt_addclamp4cols_asm (int sx, int yl, int yh); + +/// + +void rt_copy1col_RGBA_c (int hx, int sx, int yl, int yh); +void rt_copy4cols_RGBA_c (int sx, int yl, int yh); + +void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh); +void rt_shaded4cols_RGBA_c (int sx, int yl, int yh); + +void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh); +void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh); +void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh); +void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh); +void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); + +void rt_tlate1col_RGBA_c (int hx, int sx, int yl, int yh); +void rt_tlateadd1col_RGBA_c (int hx, int sx, int yl, int yh); +void rt_tlateaddclamp1col_RGBA_c (int hx, int sx, int yl, int yh); +void rt_tlatesubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); +void rt_tlaterevsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); + +void rt_map4cols_RGBA_c (int sx, int yl, int yh); +void rt_add4cols_RGBA_c (int sx, int yl, int yh); +void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh); +void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh); +void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh); + +void rt_tlate4cols_RGBA_c (int sx, int yl, int yh); +void rt_tlateadd4cols_RGBA_c (int sx, int yl, int yh); +void rt_tlateaddclamp4cols_RGBA_c (int sx, int yl, int yh); +void rt_tlatesubclamp4cols_RGBA_c (int sx, int yl, int yh); +void rt_tlaterevsubclamp4cols_RGBA_c (int sx, int yl, int yh); + } -extern void (*rt_map4cols)(int sx, int yl, int yh); +extern void (*rt_copy1col)(int hx, int sx, int yl, int yh); +extern void (*rt_copy4cols)(int sx, int yl, int yh); -#ifdef X86_ASM -#define rt_copy1col rt_copy1col_asm -#define rt_copy4cols rt_copy4cols_asm -#define rt_map1col rt_map1col_asm -#define rt_shaded4cols rt_shaded4cols_asm -#define rt_add4cols rt_add4cols_asm -#define rt_addclamp4cols rt_addclamp4cols_asm -#else -#define rt_copy1col rt_copy1col_c -#define rt_copy4cols rt_copy4cols_c -#define rt_map1col rt_map1col_c -#define rt_shaded4cols rt_shaded4cols_c -#define rt_add4cols rt_add4cols_c -#define rt_addclamp4cols rt_addclamp4cols_c -#endif +extern void (*rt_shaded1col)(int hx, int sx, int yl, int yh); +extern void (*rt_shaded4cols)(int sx, int yl, int yh); + +extern void (*rt_map1col)(int hx, int sx, int yl, int yh); +extern void (*rt_add1col)(int hx, int sx, int yl, int yh); +extern void (*rt_addclamp1col)(int hx, int sx, int yl, int yh); +extern void (*rt_subclamp1col)(int hx, int sx, int yl, int yh); +extern void (*rt_revsubclamp1col)(int hx, int sx, int yl, int yh); + +extern void (*rt_tlate1col)(int hx, int sx, int yl, int yh); +extern void (*rt_tlateadd1col)(int hx, int sx, int yl, int yh); +extern void (*rt_tlateaddclamp1col)(int hx, int sx, int yl, int yh); +extern void (*rt_tlatesubclamp1col)(int hx, int sx, int yl, int yh); +extern void (*rt_tlaterevsubclamp1col)(int hx, int sx, int yl, int yh); + +extern void (*rt_map4cols)(int sx, int yl, int yh); +extern void (*rt_add4cols)(int sx, int yl, int yh); +extern void (*rt_addclamp4cols)(int sx, int yl, int yh); +extern void (*rt_subclamp4cols)(int sx, int yl, int yh); +extern void (*rt_revsubclamp4cols)(int sx, int yl, int yh); + +extern void (*rt_tlate4cols)(int sx, int yl, int yh); +extern void (*rt_tlateadd4cols)(int sx, int yl, int yh); +extern void (*rt_tlateaddclamp4cols)(int sx, int yl, int yh); +extern void (*rt_tlatesubclamp4cols)(int sx, int yl, int yh); +extern void (*rt_tlaterevsubclamp4cols)(int sx, int yl, int yh); + +extern void (*rt_initcols)(canvas_pixel_t *buffer); void rt_draw4cols (int sx); // [RH] Preps the temporary horizontal buffer. -void rt_initcols (canvas_pixel_t *buffer=NULL); +void rt_initcols_pal (canvas_pixel_t *buffer); +void rt_initcols_rgba (canvas_pixel_t *buffer); -void R_DrawFogBoundary (int x1, int x2, short *uclip, short *dclip); + +extern void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip); + +void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip); #ifdef X86_ASM @@ -212,6 +263,14 @@ void R_DrawShadedColumnP_C (void); void R_DrawSpanP_C (void); void R_DrawSpanMaskedP_C (void); +void R_DrawColumnHorizP_RGBA_C (void); +void R_DrawColumnP_RGBA_C (void); +void R_DrawFuzzColumnP_RGBA_C (void); +void R_DrawTranslatedColumnP_RGBA_C (void); +void R_DrawShadedColumnP_RGBA_C (void); +void R_DrawSpanP_RGBA_C (void); +void R_DrawSpanMaskedP_RGBA_C (void); + #endif void R_DrawSpanTranslucentP_C (void); @@ -220,9 +279,30 @@ void R_DrawSpanMaskedTranslucentP_C (void); void R_DrawTlatedLucentColumnP_C (void); #define R_DrawTlatedLucentColumn R_DrawTlatedLucentColumnP_C -void R_FillColumnP (void); -void R_FillColumnHorizP (void); -void R_FillSpan (void); +extern void(*R_FillColumn)(void); +extern void(*R_FillAddColumn)(void); +extern void(*R_FillAddClampColumn)(void); +extern void(*R_FillSubClampColumn)(void); +extern void(*R_FillRevSubClampColumn)(void); +extern void(*R_DrawAddColumn)(void); +extern void(*R_DrawTlatedAddColumn)(void); +extern void(*R_DrawAddClampColumn)(void); +extern void(*R_DrawAddClampTranslatedColumn)(void); +extern void(*R_DrawSubClampColumn)(void); +extern void(*R_DrawSubClampTranslatedColumn)(void); +extern void(*R_DrawRevSubClampColumn)(void); +extern void(*R_DrawRevSubClampTranslatedColumn)(void); + +extern void(*R_FillSpan)(void); +extern void(*R_FillColumnHoriz)(void); + +void R_FillColumnP_C (void); + +void R_FillColumnHorizP_C (void); +void R_FillSpan_C (void); + +void R_FillColumnHorizP_RGBA_C(void); +void R_FillSpan_RGBA_C(void); #ifdef X86_ASM #define R_SetupDrawSlab R_SetupDrawSlabA @@ -282,6 +362,15 @@ inline ESPSResult R_SetPatchStyle(FRenderStyle style, float alpha, int translati // style was STYLE_Shade void R_FinishSetPatchStyle (); +extern fixed_t(*tmvline1_add)(); +extern void(*tmvline4_add)(); +extern fixed_t(*tmvline1_addclamp)(); +extern void(*tmvline4_addclamp)(); +extern fixed_t(*tmvline1_subclamp)(); +extern void(*tmvline4_subclamp)(); +extern fixed_t(*tmvline1_revsubclamp)(); +extern void(*tmvline4_revsubclamp)(); + // transmaskwallscan calls this to find out what column drawers to use bool R_GetTransMaskDrawers (fixed_t (**tmvline1)(), void (**tmvline4)()); diff --git a/src/r_drawt.cpp b/src/r_drawt.cpp index f5fc027b5..9520f59b3 100644 --- a/src/r_drawt.cpp +++ b/src/r_drawt.cpp @@ -114,13 +114,6 @@ void rt_copy1col_c (int hx, int sx, int yl, int yh) // Copies all four spans to the screen starting at sx. void rt_copy4cols_c (int sx, int yl, int yh) { -#ifndef PALETTEOUTPUT - // To do: we could do this with SSE using __m128i - rt_copy1col_c(0, sx, yl, yh); - rt_copy1col_c(1, sx + 1, yl, yh); - rt_copy1col_c(2, sx + 2, yl, yh); - rt_copy1col_c(3, sx + 3, yl, yh); -#else int *source; int *dest; int count; @@ -149,7 +142,6 @@ void rt_copy4cols_c (int sx, int yl, int yh) source += 8/sizeof(int); dest += pitch*2; } while (--count); -#endif } // Maps one span at hx to the screen at sx. @@ -166,21 +158,13 @@ void rt_map1col_c (int hx, int sx, int yl, int yh) return; count++; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); -#endif - colormap = dc_colormap; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; if (count & 1) { -#ifndef PALETTEOUTPUT - *dest = shade_pal_index(colormap[*source], light); -#else *dest = colormap[*source]; -#endif source += 4; dest += pitch; } @@ -188,13 +172,8 @@ void rt_map1col_c (int hx, int sx, int yl, int yh) return; do { -#ifndef PALETTEOUTPUT - dest[0] = shade_pal_index(colormap[source[0]], light); - dest[pitch] = shade_pal_index(colormap[source[4]], light); -#else dest[0] = colormap[source[0]]; dest[pitch] = colormap[source[4]]; -#endif source += 8; dest += pitch*2; } while (--count); @@ -214,27 +193,16 @@ void rt_map4cols_c (int sx, int yl, int yh) return; count++; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); -#endif - colormap = dc_colormap; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4]; pitch = dc_pitch; if (count & 1) { -#ifndef PALETTEOUTPUT - dest[0] = shade_pal_index(colormap[source[0]], light); - dest[1] = shade_pal_index(colormap[source[1]], light); - dest[2] = shade_pal_index(colormap[source[2]], light); - dest[3] = shade_pal_index(colormap[source[3]], light); -#else dest[0] = colormap[source[0]]; dest[1] = colormap[source[1]]; dest[2] = colormap[source[2]]; dest[3] = colormap[source[3]]; -#endif source += 4; dest += pitch; } @@ -242,16 +210,6 @@ void rt_map4cols_c (int sx, int yl, int yh) return; do { -#ifndef PALETTEOUTPUT - dest[0] = shade_pal_index(colormap[source[0]], light); - dest[1] = shade_pal_index(colormap[source[1]], light); - dest[2] = shade_pal_index(colormap[source[2]], light); - dest[3] = shade_pal_index(colormap[source[3]], light); - dest[pitch] = shade_pal_index(colormap[source[4]], light); - dest[pitch + 1] = shade_pal_index(colormap[source[5]], light); - dest[pitch + 2] = shade_pal_index(colormap[source[6]], light); - dest[pitch + 3] = shade_pal_index(colormap[source[7]], light); -#else dest[0] = colormap[source[0]]; dest[1] = colormap[source[1]]; dest[2] = colormap[source[2]]; @@ -260,7 +218,6 @@ void rt_map4cols_c (int sx, int yl, int yh) dest[pitch+1] = colormap[source[5]]; dest[pitch+2] = colormap[source[6]]; dest[pitch+3] = colormap[source[7]]; -#endif source += 8; dest += pitch*2; } while (--count); @@ -356,21 +313,21 @@ void rt_Translate4cols(const BYTE *translation, int yl, int yh) } // Translates one span at hx to the screen at sx. -void rt_tlate1col (int hx, int sx, int yl, int yh) +void rt_tlate1col_c (int hx, int sx, int yl, int yh) { rt_Translate1col(dc_translation, hx, yl, yh); rt_map1col(hx, sx, yl, yh); } // Translates all four spans to the screen starting at sx. -void rt_tlate4cols (int sx, int yl, int yh) +void rt_tlate4cols_c (int sx, int yl, int yh) { rt_Translate4cols(dc_translation, yl, yh); rt_map4cols(sx, yl, yh); } // Adds one span at hx to the screen at sx without clamping. -void rt_add1col (int hx, int sx, int yl, int yh) +void rt_add1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; canvas_pixel_t *source; @@ -388,29 +345,6 @@ void rt_add1col (int hx, int sx, int yl, int yh) pitch = dc_pitch; colormap = dc_colormap; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); - - do { - uint32_t fg = shade_pal_index(colormap[*source], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - - source += 4; - dest += pitch; - } while (--count); -#else DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; do { @@ -424,7 +358,6 @@ void rt_add1col (int hx, int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); -#endif } // Adds all four spans to the screen starting at sx without clamping. @@ -446,32 +379,6 @@ void rt_add4cols_c (int sx, int yl, int yh) pitch = dc_pitch; colormap = dc_colormap; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = shade_pal_index(colormap[source[i]], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - - source += 4; - dest += pitch; - } while (--count); -#else DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; @@ -508,25 +415,24 @@ void rt_add4cols_c (int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); -#endif } // Translates and adds one span at hx to the screen at sx without clamping. -void rt_tlateadd1col (int hx, int sx, int yl, int yh) +void rt_tlateadd1col_c (int hx, int sx, int yl, int yh) { rt_Translate1col(dc_translation, hx, yl, yh); rt_add1col(hx, sx, yl, yh); } // Translates and adds all four spans to the screen starting at sx without clamping. -void rt_tlateadd4cols (int sx, int yl, int yh) +void rt_tlateadd4cols_c (int sx, int yl, int yh) { rt_Translate4cols(dc_translation, yl, yh); rt_add4cols(sx, yl, yh); } // Shades one span at hx to the screen at sx. -void rt_shaded1col (int hx, int sx, int yl, int yh) +void rt_shaded1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; canvas_pixel_t *source; @@ -544,29 +450,6 @@ void rt_shaded1col (int hx, int sx, int yl, int yh) source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; -#ifndef PALETTEOUTPUT - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do { - uint32_t alpha = colormap[*source]; - uint32_t inv_alpha = 64 - alpha; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; - uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; - uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; - dest += pitch; - } while (--count); -#else DWORD *fgstart; fgstart = &Col2RGB8[0][dc_color]; @@ -578,7 +461,6 @@ void rt_shaded1col (int hx, int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); -#endif } // Shades all four spans to the screen starting at sx. @@ -600,32 +482,6 @@ void rt_shaded4cols_c (int sx, int yl, int yh) source = &dc_temp[yl*4]; pitch = dc_pitch; -#ifndef PALETTEOUTPUT - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do { - for (int i = 0; i < 4; i++) - { - uint32_t alpha = colormap[source[i]]; - uint32_t inv_alpha = 64 - alpha; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; - uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; - uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - source += 4; - dest += pitch; - } while (--count); -#else DWORD *fgstart; fgstart = &Col2RGB8[0][dc_color]; @@ -651,11 +507,10 @@ void rt_shaded4cols_c (int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); -#endif } // Adds one span at hx to the screen at sx with clamping. -void rt_addclamp1col (int hx, int sx, int yl, int yh) +void rt_addclamp1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; canvas_pixel_t *source; @@ -673,28 +528,6 @@ void rt_addclamp1col (int hx, int sx, int yl, int yh) pitch = dc_pitch; colormap = dc_colormap; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); - - do { - uint32_t fg = shade_pal_index(colormap[*source], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; - dest += pitch; - } while (--count); -#else DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; @@ -711,7 +544,6 @@ void rt_addclamp1col (int hx, int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); -#endif } // Adds all four spans to the screen starting at sx with clamping. @@ -733,31 +565,6 @@ void rt_addclamp4cols_c (int sx, int yl, int yh) pitch = dc_pitch; colormap = dc_colormap; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = shade_pal_index(colormap[source[i]], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - source += 4; - dest += pitch; - } while (--count); -#else DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; @@ -802,25 +609,24 @@ void rt_addclamp4cols_c (int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); -#endif } // Translates and adds one span at hx to the screen at sx with clamping. -void rt_tlateaddclamp1col (int hx, int sx, int yl, int yh) +void rt_tlateaddclamp1col_c (int hx, int sx, int yl, int yh) { rt_Translate1col(dc_translation, hx, yl, yh); rt_addclamp1col(hx, sx, yl, yh); } // Translates and adds all four spans to the screen starting at sx with clamping. -void rt_tlateaddclamp4cols (int sx, int yl, int yh) +void rt_tlateaddclamp4cols_c (int sx, int yl, int yh) { rt_Translate4cols(dc_translation, yl, yh); rt_addclamp4cols(sx, yl, yh); } // Subtracts one span at hx to the screen at sx with clamping. -void rt_subclamp1col (int hx, int sx, int yl, int yh) +void rt_subclamp1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; canvas_pixel_t *source; @@ -838,28 +644,6 @@ void rt_subclamp1col (int hx, int sx, int yl, int yh) pitch = dc_pitch; colormap = dc_colormap; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); - - do { - uint32_t fg = shade_pal_index(colormap[*source], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; - dest += pitch; - } while (--count); -#else DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; do { @@ -874,11 +658,10 @@ void rt_subclamp1col (int hx, int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); -#endif } // Subtracts all four spans to the screen starting at sx with clamping. -void rt_subclamp4cols (int sx, int yl, int yh) +void rt_subclamp4cols_c (int sx, int yl, int yh) { BYTE *colormap; canvas_pixel_t *source; @@ -896,32 +679,6 @@ void rt_subclamp4cols (int sx, int yl, int yh) pitch = dc_pitch; colormap = dc_colormap; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = shade_pal_index(colormap[source[i]], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - - source += 4; - dest += pitch; - } while (--count); -#else DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; do { @@ -961,25 +718,24 @@ void rt_subclamp4cols (int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); -#endif } // Translates and subtracts one span at hx to the screen at sx with clamping. -void rt_tlatesubclamp1col (int hx, int sx, int yl, int yh) +void rt_tlatesubclamp1col_c (int hx, int sx, int yl, int yh) { rt_Translate1col(dc_translation, hx, yl, yh); rt_subclamp1col(hx, sx, yl, yh); } // Translates and subtracts all four spans to the screen starting at sx with clamping. -void rt_tlatesubclamp4cols (int sx, int yl, int yh) +void rt_tlatesubclamp4cols_c (int sx, int yl, int yh) { rt_Translate4cols(dc_translation, yl, yh); rt_subclamp4cols(sx, yl, yh); } // Subtracts one span at hx from the screen at sx with clamping. -void rt_revsubclamp1col (int hx, int sx, int yl, int yh) +void rt_revsubclamp1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; canvas_pixel_t *source; @@ -999,28 +755,6 @@ void rt_revsubclamp1col (int hx, int sx, int yl, int yh) pitch = dc_pitch; colormap = dc_colormap; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); - - do { - uint32_t fg = shade_pal_index(colormap[*source], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; - dest += pitch; - } while (--count); -#else do { DWORD a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[*source]]; DWORD b = a; @@ -1033,11 +767,10 @@ void rt_revsubclamp1col (int hx, int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); -#endif } // Subtracts all four spans from the screen starting at sx with clamping. -void rt_revsubclamp4cols (int sx, int yl, int yh) +void rt_revsubclamp4cols_c (int sx, int yl, int yh) { BYTE *colormap; canvas_pixel_t *source; @@ -1057,32 +790,6 @@ void rt_revsubclamp4cols (int sx, int yl, int yh) pitch = dc_pitch; colormap = dc_colormap; -#ifndef PALETTEOUTPUT - uint32_t light = calc_light_multiplier(dc_light); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = shade_pal_index(colormap[source[i]], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - - source += 4; - dest += pitch; - } while (--count); -#else do { DWORD a = (bg2rgb[dest[0]] | 0x40100400) - fg2rgb[colormap[source[0]]]; DWORD b = a; @@ -1120,18 +827,17 @@ void rt_revsubclamp4cols (int sx, int yl, int yh) source += 4; dest += pitch; } while (--count); -#endif } // Translates and subtracts one span at hx from the screen at sx with clamping. -void rt_tlaterevsubclamp1col (int hx, int sx, int yl, int yh) +void rt_tlaterevsubclamp1col_c (int hx, int sx, int yl, int yh) { rt_Translate1col(dc_translation, hx, yl, yh); rt_revsubclamp1col(hx, sx, yl, yh); } // Translates and subtracts all four spans from the screen starting at sx with clamping. -void rt_tlaterevsubclamp4cols (int sx, int yl, int yh) +void rt_tlaterevsubclamp4cols_c (int sx, int yl, int yh) { rt_Translate4cols(dc_translation, yl, yh); rt_revsubclamp4cols(sx, yl, yh); @@ -1301,7 +1007,7 @@ void rt_draw4cols (int sx) // Before each pass through a rendering loop that uses these routines, // call this function to set up the span pointers. -void rt_initcols (canvas_pixel_t *buff) +void rt_initcols_pal (canvas_pixel_t *buff) { int y; @@ -1372,7 +1078,7 @@ void R_DrawColumnHorizP_C (void) } // [RH] Just fills a column with a given color -void R_FillColumnHorizP (void) +void R_FillColumnHorizP_C (void) { int count = dc_count; BYTE color = dc_color; diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp new file mode 100644 index 000000000..e8111be8f --- /dev/null +++ b/src/r_drawt_rgba.cpp @@ -0,0 +1,883 @@ +/* +** r_drawt_rgba.cpp +** Faster column drawers for modern processors, true color edition +** +**--------------------------------------------------------------------------- +** Copyright 1998-2006 Randy Heit +** All rights reserved. +** +** Redistribution and use in source and binary forms, with or without +** modification, are permitted provided that the following conditions +** are met: +** +** 1. Redistributions of source code must retain the above copyright +** notice, this list of conditions and the following disclaimer. +** 2. Redistributions in binary form must reproduce the above copyright +** notice, this list of conditions and the following disclaimer in the +** documentation and/or other materials provided with the distribution. +** 3. The name of the author may not be used to endorse or promote products +** derived from this software without specific prior written permission. +** +** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**--------------------------------------------------------------------------- +** +** True color versions of the similar functions in r_drawt.cpp +** Please see r_drawt.cpp for a description of the globals used. +*/ + +#include "templates.h" +#include "doomtype.h" +#include "doomdef.h" +#include "r_defs.h" +#include "r_draw.h" +#include "r_main.h" +#include "r_things.h" +#include "v_video.h" + +canvas_pixel_t dc_temp_rgbabuff_rgba[MAXHEIGHT*4]; +canvas_pixel_t *dc_temp_rgba; + +// Defined in r_draw_t.cpp: +extern unsigned int dc_tspans[4][MAXHEIGHT]; +extern unsigned int *dc_ctspan[4]; +extern unsigned int *horizspan[4]; + +// Copies one span at hx to the screen at sx. +void rt_copy1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4 + hx]; + pitch = dc_pitch; + + if (count & 1) { + *dest = *source; + source += 4; + dest += pitch; + } + if (count & 2) { + dest[0] = source[0]; + dest[pitch] = source[4]; + source += 8; + dest += pitch*2; + } + if (!(count >>= 2)) + return; + + do { + dest[0] = source[0]; + dest[pitch] = source[4]; + dest[pitch*2] = source[8]; + dest[pitch*3] = source[12]; + source += 16; + dest += pitch*4; + } while (--count); +} + +// Copies all four spans to the screen starting at sx. +void rt_copy4cols_RGBA_c (int sx, int yl, int yh) +{ + // To do: we could do this with SSE using __m128i + rt_copy1col_RGBA_c(0, sx, yl, yh); + rt_copy1col_RGBA_c(1, sx + 1, yl, yh); + rt_copy1col_RGBA_c(2, sx + 2, yl, yh); + rt_copy1col_RGBA_c(3, sx + 3, yl, yh); +} + +// Maps one span at hx to the screen at sx. +void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + uint32_t light = calc_light_multiplier(dc_light); + + colormap = dc_colormap; + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4 + hx]; + pitch = dc_pitch; + + if (count & 1) { + *dest = shade_pal_index(colormap[*source], light); + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + dest[0] = shade_pal_index(colormap[source[0]], light); + dest[pitch] = shade_pal_index(colormap[source[4]], light); + source += 8; + dest += pitch*2; + } while (--count); +} + +// Maps all four spans to the screen starting at sx. +void rt_map4cols_RGBA_c (int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + uint32_t light = calc_light_multiplier(dc_light); + + colormap = dc_colormap; + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4]; + pitch = dc_pitch; + + if (count & 1) { + dest[0] = shade_pal_index(colormap[source[0]], light); + dest[1] = shade_pal_index(colormap[source[1]], light); + dest[2] = shade_pal_index(colormap[source[2]], light); + dest[3] = shade_pal_index(colormap[source[3]], light); + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + dest[0] = shade_pal_index(colormap[source[0]], light); + dest[1] = shade_pal_index(colormap[source[1]], light); + dest[2] = shade_pal_index(colormap[source[2]], light); + dest[3] = shade_pal_index(colormap[source[3]], light); + dest[pitch] = shade_pal_index(colormap[source[4]], light); + dest[pitch + 1] = shade_pal_index(colormap[source[5]], light); + dest[pitch + 2] = shade_pal_index(colormap[source[6]], light); + dest[pitch + 3] = shade_pal_index(colormap[source[7]], light); + source += 8; + dest += pitch*2; + } while (--count); +} + +void rt_Translate1col_RGBA_c(const BYTE *translation, int hx, int yl, int yh) +{ + int count = yh - yl + 1; + canvas_pixel_t *source = &dc_temp_rgba[yl*4 + hx]; + + // Things we do to hit the compiler's optimizer with a clue bat: + // 1. Parallelism is explicitly spelled out by using a separate + // C instruction for each assembly instruction. GCC lets me + // have four temporaries, but VC++ spills to the stack with + // more than two. Two is probably optimal, anyway. + // 2. The results of the translation lookups are explicitly + // stored in byte-sized variables. This causes the VC++ code + // to use byte mov instructions in most cases; for apparently + // random reasons, it will use movzx for some places. GCC + // ignores this and uses movzx always. + + // Do 8 rows at a time. + for (int count8 = count >> 3; count8; --count8) + { + int c0, c1; + BYTE b0, b1; + + c0 = source[0]; c1 = source[4]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[4] = b1; + + c0 = source[8]; c1 = source[12]; + b0 = translation[c0]; b1 = translation[c1]; + source[8] = b0; source[12] = b1; + + c0 = source[16]; c1 = source[20]; + b0 = translation[c0]; b1 = translation[c1]; + source[16] = b0; source[20] = b1; + + c0 = source[24]; c1 = source[28]; + b0 = translation[c0]; b1 = translation[c1]; + source[24] = b0; source[28] = b1; + + source += 32; + } + // Finish by doing 1 row at a time. + for (count &= 7; count; --count, source += 4) + { + source[0] = translation[source[0]]; + } +} + +void rt_Translate4cols_RGBA_c(const BYTE *translation, int yl, int yh) +{ + int count = yh - yl + 1; + canvas_pixel_t *source = &dc_temp_rgba[yl*4]; + int c0, c1; + BYTE b0, b1; + + // Do 2 rows at a time. + for (int count8 = count >> 1; count8; --count8) + { + c0 = source[0]; c1 = source[1]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[1] = b1; + + c0 = source[2]; c1 = source[3]; + b0 = translation[c0]; b1 = translation[c1]; + source[2] = b0; source[3] = b1; + + c0 = source[4]; c1 = source[5]; + b0 = translation[c0]; b1 = translation[c1]; + source[4] = b0; source[5] = b1; + + c0 = source[6]; c1 = source[7]; + b0 = translation[c0]; b1 = translation[c1]; + source[6] = b0; source[7] = b1; + + source += 8; + } + // Do the final row if count was odd. + if (count & 1) + { + c0 = source[0]; c1 = source[1]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[1] = b1; + + c0 = source[2]; c1 = source[3]; + b0 = translation[c0]; b1 = translation[c1]; + source[2] = b0; source[3] = b1; + } +} + +// Translates one span at hx to the screen at sx. +void rt_tlate1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); + rt_map1col(hx, sx, yl, yh); +} + +// Translates all four spans to the screen starting at sx. +void rt_tlate4cols_RGBA_c (int sx, int yl, int yh) +{ + rt_Translate4cols_RGBA_c(dc_translation, yl, yh); + rt_map4cols(sx, yl, yh); +} + +// Adds one span at hx to the screen at sx without clamping. +void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4 + hx]; + pitch = dc_pitch; + colormap = dc_colormap; + + uint32_t light = calc_light_multiplier(dc_light); + + do { + uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + + source += 4; + dest += pitch; + } while (--count); +} + +// Adds all four spans to the screen starting at sx without clamping. +void rt_add4cols_RGBA_c (int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4]; + pitch = dc_pitch; + colormap = dc_colormap; + + uint32_t light = calc_light_multiplier(dc_light); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + + source += 4; + dest += pitch; + } while (--count); +} + +// Translates and adds one span at hx to the screen at sx without clamping. +void rt_tlateadd1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); + rt_add1col(hx, sx, yl, yh); +} + +// Translates and adds all four spans to the screen starting at sx without clamping. +void rt_tlateadd4cols_RGBA_c(int sx, int yl, int yh) +{ + rt_Translate4cols_RGBA_c(dc_translation, yl, yh); + rt_add4cols(sx, yl, yh); +} + +// Shades one span at hx to the screen at sx. +void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + colormap = dc_colormap; + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4 + hx]; + pitch = dc_pitch; + + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do { + uint32_t alpha = colormap[*source]; + uint32_t inv_alpha = 64 - alpha; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; + uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; + uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); +} + +// Shades all four spans to the screen starting at sx. +void rt_shaded4cols_RGBA_c (int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + colormap = dc_colormap; + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4]; + pitch = dc_pitch; + + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do { + for (int i = 0; i < 4; i++) + { + uint32_t alpha = colormap[source[i]]; + uint32_t inv_alpha = 64 - alpha; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; + uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; + uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + source += 4; + dest += pitch; + } while (--count); +} + +// Adds one span at hx to the screen at sx with clamping. +void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4 + hx]; + pitch = dc_pitch; + colormap = dc_colormap; + + uint32_t light = calc_light_multiplier(dc_light); + + do { + uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); +} + +// Adds all four spans to the screen starting at sx with clamping. +void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4]; + pitch = dc_pitch; + colormap = dc_colormap; + + uint32_t light = calc_light_multiplier(dc_light); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + source += 4; + dest += pitch; + } while (--count); +} + +// Translates and adds one span at hx to the screen at sx with clamping. +void rt_tlateaddclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); + rt_addclamp1col_RGBA_c(hx, sx, yl, yh); +} + +// Translates and adds all four spans to the screen starting at sx with clamping. +void rt_tlateaddclamp4cols_RGBA_c (int sx, int yl, int yh) +{ + rt_Translate4cols_RGBA_c(dc_translation, yl, yh); + rt_addclamp4cols(sx, yl, yh); +} + +// Subtracts one span at hx to the screen at sx with clamping. +void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4 + hx]; + pitch = dc_pitch; + colormap = dc_colormap; + + uint32_t light = calc_light_multiplier(dc_light); + + do { + uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); +} + +// Subtracts all four spans to the screen starting at sx with clamping. +void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4]; + pitch = dc_pitch; + colormap = dc_colormap; + + uint32_t light = calc_light_multiplier(dc_light); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + + source += 4; + dest += pitch; + } while (--count); +} + +// Translates and subtracts one span at hx to the screen at sx with clamping. +void rt_tlatesubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); + rt_subclamp1col_RGBA_c(hx, sx, yl, yh); +} + +// Translates and subtracts all four spans to the screen starting at sx with clamping. +void rt_tlatesubclamp4cols_RGBA_c (int sx, int yl, int yh) +{ + rt_Translate4cols_RGBA_c(dc_translation, yl, yh); + rt_subclamp4cols_RGBA_c(sx, yl, yh); +} + +// Subtracts one span at hx from the screen at sx with clamping. +void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4 + hx]; + pitch = dc_pitch; + colormap = dc_colormap; + + uint32_t light = calc_light_multiplier(dc_light); + + do { + uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); +} + +// Subtracts all four spans from the screen starting at sx with clamping. +void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) +{ + BYTE *colormap; + canvas_pixel_t *source; + canvas_pixel_t *dest; + int count; + int pitch; + + count = yh-yl; + if (count < 0) + return; + count++; + + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; + dest = ylookup[yl] + sx + dc_destorg; + source = &dc_temp_rgba[yl*4]; + pitch = dc_pitch; + colormap = dc_colormap; + + uint32_t light = calc_light_multiplier(dc_light); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; + uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; + uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + + source += 4; + dest += pitch; + } while (--count); +} + +// Translates and subtracts one span at hx from the screen at sx with clamping. +void rt_tlaterevsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +{ + rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); + rt_revsubclamp1col_RGBA_c(hx, sx, yl, yh); +} + +// Translates and subtracts all four spans from the screen starting at sx with clamping. +void rt_tlaterevsubclamp4cols_RGBA_c (int sx, int yl, int yh) +{ + rt_Translate4cols_RGBA_c(dc_translation, yl, yh); + rt_revsubclamp4cols_RGBA_c(sx, yl, yh); +} + +// Before each pass through a rendering loop that uses these routines, +// call this function to set up the span pointers. +void rt_initcols_rgba (canvas_pixel_t *buff) +{ + int y; + + dc_temp_rgba = buff == NULL ? dc_temp_rgbabuff_rgba : buff; + for (y = 3; y >= 0; y--) + horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0]; +} + +// Stretches a column into a temporary buffer which is later +// drawn to the screen along with up to three other columns. +void R_DrawColumnHorizP_RGBA_C (void) +{ + int count = dc_count; + canvas_pixel_t *dest; + fixed_t fracstep; + fixed_t frac; + + if (count <= 0) + return; + + { + int x = dc_x & 3; + unsigned int **span; + + span = &dc_ctspan[x]; + (*span)[0] = dc_yl; + (*span)[1] = dc_yh; + *span += 2; + dest = &dc_temp_rgba[x + 4*dc_yl]; + } + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + const BYTE *source = dc_source; + + if (count & 1) { + *dest = source[frac>>FRACBITS]; dest += 4; frac += fracstep; + } + if (count & 2) { + dest[0] = source[frac>>FRACBITS]; frac += fracstep; + dest[4] = source[frac>>FRACBITS]; frac += fracstep; + dest += 8; + } + if (count & 4) { + dest[0] = source[frac>>FRACBITS]; frac += fracstep; + dest[4] = source[frac>>FRACBITS]; frac += fracstep; + dest[8] = source[frac>>FRACBITS]; frac += fracstep; + dest[12]= source[frac>>FRACBITS]; frac += fracstep; + dest += 16; + } + count >>= 3; + if (!count) return; + + do + { + dest[0] = source[frac>>FRACBITS]; frac += fracstep; + dest[4] = source[frac>>FRACBITS]; frac += fracstep; + dest[8] = source[frac>>FRACBITS]; frac += fracstep; + dest[12]= source[frac>>FRACBITS]; frac += fracstep; + dest[16]= source[frac>>FRACBITS]; frac += fracstep; + dest[20]= source[frac>>FRACBITS]; frac += fracstep; + dest[24]= source[frac>>FRACBITS]; frac += fracstep; + dest[28]= source[frac>>FRACBITS]; frac += fracstep; + dest += 32; + } while (--count); + } +} + +// [RH] Just fills a column with a given color +void R_FillColumnHorizP_RGBA_C (void) +{ + int count = dc_count; + BYTE color = dc_color; + canvas_pixel_t *dest; + + if (count <= 0) + return; + + { + int x = dc_x & 3; + unsigned int **span = &dc_ctspan[x]; + + (*span)[0] = dc_yl; + (*span)[1] = dc_yh; + *span += 2; + dest = &dc_temp_rgba[x + 4*dc_yl]; + } + + if (count & 1) { + *dest = color; + dest += 4; + } + if (!(count >>= 1)) + return; + do { + dest[0] = color; dest[4] = color; + dest += 8; + } while (--count); +} diff --git a/src/r_main.cpp b/src/r_main.cpp index 04e798981..b7723d07d 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -847,10 +847,10 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) // [RH] Show off segs if r_drawflat is 1 if (r_drawflat) { - hcolfunc_pre = R_FillColumnHorizP; + hcolfunc_pre = R_FillColumnHoriz; hcolfunc_post1 = rt_copy1col; hcolfunc_post4 = rt_copy4cols; - colfunc = R_FillColumnP; + colfunc = R_FillColumn; spanfunc = R_FillSpan; } else diff --git a/src/r_plane.cpp b/src/r_plane.cpp index b385302e5..c8258a1ba 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -491,18 +491,19 @@ void R_MapTiltedPlane (int y, int x1) // //========================================================================== -void R_MapColoredPlane (int y, int x1) +void R_MapColoredPlane_C (int y, int x1) +{ + memset (ylookup[y] + x1 + dc_destorg, ds_color, (spanend[y] - x1 + 1)); +} + +void R_MapColoredPlane_RGBA(int y, int x1) { -#ifndef PALETTEOUTPUT canvas_pixel_t *dest = ylookup[y] + x1 + dc_destorg; int count = (spanend[y] - x1 + 1); uint32_t light = calc_light_multiplier(ds_light); uint32_t color = shade_pal_index(ds_color, light); for (int i = 0; i < count; i++) dest[i] = color; -#else - memset (ylookup[y] + x1 + dc_destorg, ds_color, (spanend[y] - x1 + 1) * sizeof(canvas_pixel_t)); -#endif } //========================================================================== diff --git a/src/r_plane.h b/src/r_plane.h index d4db3dc09..ac63501e3 100644 --- a/src/r_plane.h +++ b/src/r_plane.h @@ -93,6 +93,10 @@ void R_DrawNormalPlane (visplane_t *pl, double xscale, double yscale, fixed_t al void R_DrawTiltedPlane (visplane_t *pl, double xscale, double yscale, fixed_t alpha, bool additive, bool masked); void R_MapVisPlane (visplane_t *pl, void (*mapfunc)(int y, int x1)); +extern void(*R_MapColoredPlane)(int y, int x1); +void R_MapColoredPlane_C(int y, int x1); +void R_MapColoredPlane_RGBA(int y, int x1); + visplane_t *R_FindPlane ( const secplane_t &height, FTextureID picnum, diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 1cdb78555..fb27a99de 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -463,7 +463,7 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) while (dc_x < stop) { - rt_initcols(); + rt_initcols(nullptr); BlastMaskedColumn (R_DrawMaskedColumnHoriz, tex); dc_x++; BlastMaskedColumn (R_DrawMaskedColumnHoriz, tex); dc_x++; BlastMaskedColumn (R_DrawMaskedColumnHoriz, tex); dc_x++; @@ -3319,7 +3319,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, dc_light = 0; #endif } - rt_initcols(); + rt_initcols(nullptr); for (int zz = 4; zz; --zz) { R_WallSpriteColumn (R_DrawMaskedColumnHoriz); diff --git a/src/r_things.cpp b/src/r_things.cpp index 0e55b45f9..a6f6aea28 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -470,7 +470,7 @@ void R_DrawVisSprite (vissprite_t *vis) while (dc_x < stop4) { - rt_initcols(); + rt_initcols(nullptr); for (int zz = 4; zz; --zz) { pixels = tex->GetColumn (frac >> FRACBITS, &spans); @@ -619,7 +619,7 @@ void R_DrawWallSprite(vissprite_t *spr) dc_light = FLOAT2FIXED(MAXLIGHTVIS); #endif } - rt_initcols(); + rt_initcols(nullptr); for (int zz = 4; zz; --zz) { if (!R_ClipSpriteColumnWithPortals(spr)) @@ -681,7 +681,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop { return; } - if (colfunc == fuzzcolfunc || colfunc == R_FillColumnP) + if (colfunc == fuzzcolfunc || colfunc == R_FillColumn) { flags = DVF_OFFSCREEN | DVF_SPANSONLY; } @@ -2617,7 +2617,7 @@ static void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis) } } -void R_DrawParticle (vissprite_t *vis) +void R_DrawParticle_C (vissprite_t *vis) { int spacing; canvas_pixel_t *dest; @@ -2629,44 +2629,6 @@ void R_DrawParticle (vissprite_t *vis) R_DrawMaskedSegsBehindParticle (vis); -#ifndef PALETTEOUTPUT - uint32_t fg = shade_pal_index(color, calc_light_multiplier(0)); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - // vis->renderflags holds translucency level (0-255) - fixed_t fglevel = ((vis->renderflags + 1) << 8) & ~0x3ff; - uint32_t alpha = fglevel * 256 / FRACUNIT; - uint32_t inv_alpha = 256 - alpha; - - fg_red *= alpha; - fg_green *= alpha; - fg_blue *= alpha; - - spacing = RenderTarget->GetPitch(); - - for (int x = x1; x < (x1 + countbase); x++) - { - dc_x = x; - if (R_ClipSpriteColumnWithPortals(vis)) - continue; - dest = ylookup[yl] + x + dc_destorg; - for (int y = 0; y < ycount; y++) - { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red + bg_red * alpha) / 256; - uint32_t green = (fg_green + bg_green * alpha) / 256; - uint32_t blue = (fg_blue + bg_blue * alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += spacing; - } - } -#else DWORD *bg2rgb; DWORD fg; @@ -2719,7 +2681,56 @@ void R_DrawParticle (vissprite_t *vis) dest += spacing; } } -#endif +} + +void R_DrawParticle_RGBA(vissprite_t *vis) +{ + int spacing; + canvas_pixel_t *dest; + BYTE color = vis->Style.colormap[vis->startfrac]; + int yl = vis->y1; + int ycount = vis->y2 - yl + 1; + int x1 = vis->x1; + int countbase = vis->x2 - x1; + + R_DrawMaskedSegsBehindParticle(vis); + + uint32_t fg = shade_pal_index(color, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + // vis->renderflags holds translucency level (0-255) + fixed_t fglevel = ((vis->renderflags + 1) << 8) & ~0x3ff; + uint32_t alpha = fglevel * 256 / FRACUNIT; + uint32_t inv_alpha = 256 - alpha; + + fg_red *= alpha; + fg_green *= alpha; + fg_blue *= alpha; + + spacing = RenderTarget->GetPitch(); + + for (int x = x1; x < (x1 + countbase); x++) + { + dc_x = x; + if (R_ClipSpriteColumnWithPortals(vis)) + continue; + dest = ylookup[yl] + x + dc_destorg; + for (int y = 0; y < ycount; y++) + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red * alpha) / 256; + uint32_t green = (fg_green + bg_green * alpha) / 256; + uint32_t blue = (fg_blue + bg_blue * alpha) / 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += spacing; + } + } } extern double BaseYaspectMul;; diff --git a/src/r_things.h b/src/r_things.h index 1cf9b0200..057b7cfe2 100644 --- a/src/r_things.h +++ b/src/r_things.h @@ -97,7 +97,10 @@ struct vissprite_t struct particle_t; -void R_DrawParticle (vissprite_t *); +extern void(*R_DrawParticle)(vissprite_t *); +void R_DrawParticle_C (vissprite_t *); +void R_DrawParticle_RGBA (vissprite_t *); + void R_ProjectParticle (particle_t *, const sector_t *sector, int shade, int fakeside); extern int MaxVisSprites; diff --git a/src/v_draw.cpp b/src/v_draw.cpp index fd14b5e0a..8853fc947 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -300,7 +300,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) while (dc_x < stop4) { - rt_initcols(); + rt_initcols(nullptr); for (int zz = 4; zz; --zz) { pixels = img->GetColumn(frac >> FRACBITS, spanptr); From 7080180d478ae7158e4f2b8d1821089105764c8f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 30 May 2016 13:32:24 +0200 Subject: [PATCH 003/100] Added menu option for toggling true color output on and off --- src/r_draw.cpp | 327 ++++++++++++++++++------------------- src/r_main.cpp | 1 + src/win32/fb_d3d9.cpp | 79 +++++---- src/win32/win32iface.h | 1 + wadsrc/static/language.enu | 1 + wadsrc/static/menudef.txt | 1 + 6 files changed, 216 insertions(+), 194 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index d2b694f05..83a4472f3 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -178,7 +178,7 @@ FDynamicColormap ShadeFakeColormap[16]; BYTE identitymap[256]; EXTERN_CVAR (Int, r_columnmethod) - +EXTERN_CVAR (Bool, r_swtruecolor) void R_InitShadeMaps() { @@ -4135,181 +4135,180 @@ const BYTE *R_GetColumn (FTexture *tex, int col) // [RH] Initialize the column drawer pointers void R_InitColumnDrawers () { -#ifndef PALETTEOUTPUT - - R_DrawColumnHoriz = R_DrawColumnHorizP_RGBA_C; - R_DrawColumn = R_DrawColumnP_RGBA_C; - R_DrawFuzzColumn = R_DrawFuzzColumnP_RGBA_C; - R_DrawTranslatedColumn = R_DrawTranslatedColumnP_RGBA_C; - R_DrawShadedColumn = R_DrawShadedColumnP_RGBA_C; - R_DrawSpan = R_DrawSpanP_RGBA_C; - R_DrawSpanMasked = R_DrawSpanMaskedP_RGBA_C; - rt_map4cols = rt_map4cols_RGBA_c; - - R_DrawSpanTranslucent = R_DrawSpanTranslucentP_RGBA_C; - R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_RGBA_C; - R_DrawSpanAddClamp = R_DrawSpanAddClampP_RGBA_C; - R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_RGBA_C; - R_FillColumn = R_FillColumnP_RGBA; - R_FillAddColumn = R_FillAddColumn_RGBA_C; - R_FillAddClampColumn = R_FillAddClampColumn_RGBA; - R_FillSubClampColumn = R_FillSubClampColumn_RGBA; - R_FillRevSubClampColumn = R_FillRevSubClampColumn_RGBA; - R_DrawAddColumn = R_DrawAddColumnP_RGBA_C; - R_DrawTlatedAddColumn = R_DrawTlatedAddColumnP_RGBA_C; - R_DrawAddClampColumn = R_DrawAddClampColumnP_RGBA_C; - R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumnP_RGBA_C; - R_DrawSubClampColumn = R_DrawSubClampColumnP_RGBA_C; - R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumnP_RGBA_C; - R_DrawRevSubClampColumn = R_DrawRevSubClampColumnP_RGBA_C; - R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumnP_RGBA_C; - R_FillSpan = R_FillSpan_RGBA; - R_DrawFogBoundary = R_DrawFogBoundary_RGBA; - R_FillColumnHoriz = R_FillColumnHorizP_RGBA_C; - - R_DrawFogBoundary = R_DrawFogBoundary_RGBA; - R_MapColoredPlane = R_MapColoredPlane_RGBA; - R_DrawParticle = R_DrawParticle_RGBA; - - tmvline1_add = tmvline1_add_RGBA; - tmvline4_add = tmvline4_add_RGBA; - tmvline1_addclamp = tmvline1_addclamp_RGBA; - tmvline4_addclamp = tmvline4_addclamp_RGBA; - tmvline1_subclamp = tmvline1_subclamp_RGBA; - tmvline4_subclamp = tmvline4_subclamp_RGBA; - tmvline1_revsubclamp = tmvline1_revsubclamp_RGBA; - tmvline4_revsubclamp = tmvline4_revsubclamp_RGBA; - - rt_copy1col = rt_copy1col_RGBA_c; - rt_copy4cols = rt_copy4cols_RGBA_c; - rt_map1col = rt_map1col_RGBA_c; - rt_shaded4cols = rt_shaded4cols_RGBA_c; - rt_add4cols = rt_add4cols_RGBA_c; - rt_addclamp4cols = rt_addclamp4cols_RGBA_c; - rt_shaded1col = rt_shaded1col_RGBA_c; - rt_add1col = rt_add1col_RGBA_c; - rt_addclamp1col = rt_addclamp1col_RGBA_c; - rt_subclamp1col = rt_subclamp1col_RGBA_c; - rt_revsubclamp1col = rt_revsubclamp1col_RGBA_c; - rt_tlate1col = rt_tlate1col_RGBA_c; - rt_tlateadd1col = rt_tlateadd1col_RGBA_c; - rt_tlateaddclamp1col = rt_tlateaddclamp1col_RGBA_c; - rt_tlatesubclamp1col = rt_tlatesubclamp1col_RGBA_c; - rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_RGBA_c; - rt_map4cols = rt_map4cols_RGBA_c; - rt_subclamp4cols = rt_subclamp4cols_RGBA_c; - rt_revsubclamp4cols = rt_revsubclamp4cols_RGBA_c; - rt_tlate4cols = rt_tlate4cols_RGBA_c; - rt_tlateadd4cols = rt_tlateadd4cols_RGBA_c; - rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_RGBA_c; - rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_RGBA_c; - rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_RGBA_c; - rt_initcols = rt_initcols_rgba; - - dovline1 = vlinec1_RGBA; - doprevline1 = vlinec1_RGBA; - dovline4 = vlinec4_RGBA; - domvline1 = mvlinec1_RGBA; - domvline4 = mvlinec4_RGBA; - -#else - -#ifdef X86_ASM - R_DrawColumn = R_DrawColumnP_ASM; - R_DrawColumnHoriz = R_DrawColumnHorizP_ASM; - R_DrawFuzzColumn = R_DrawFuzzColumnP_ASM; - R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C; - R_DrawShadedColumn = R_DrawShadedColumnP_C; - R_DrawSpan = R_DrawSpanP_ASM; - R_DrawSpanMasked = R_DrawSpanMaskedP_ASM; - if (CPU.Family <= 5) + if (r_swtruecolor) { - rt_map4cols = rt_map4cols_asm2; + R_DrawColumnHoriz = R_DrawColumnHorizP_RGBA_C; + R_DrawColumn = R_DrawColumnP_RGBA_C; + R_DrawFuzzColumn = R_DrawFuzzColumnP_RGBA_C; + R_DrawTranslatedColumn = R_DrawTranslatedColumnP_RGBA_C; + R_DrawShadedColumn = R_DrawShadedColumnP_RGBA_C; + R_DrawSpan = R_DrawSpanP_RGBA_C; + R_DrawSpanMasked = R_DrawSpanMaskedP_RGBA_C; + rt_map4cols = rt_map4cols_RGBA_c; + + R_DrawSpanTranslucent = R_DrawSpanTranslucentP_RGBA_C; + R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_RGBA_C; + R_DrawSpanAddClamp = R_DrawSpanAddClampP_RGBA_C; + R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_RGBA_C; + R_FillColumn = R_FillColumnP_RGBA; + R_FillAddColumn = R_FillAddColumn_RGBA_C; + R_FillAddClampColumn = R_FillAddClampColumn_RGBA; + R_FillSubClampColumn = R_FillSubClampColumn_RGBA; + R_FillRevSubClampColumn = R_FillRevSubClampColumn_RGBA; + R_DrawAddColumn = R_DrawAddColumnP_RGBA_C; + R_DrawTlatedAddColumn = R_DrawTlatedAddColumnP_RGBA_C; + R_DrawAddClampColumn = R_DrawAddClampColumnP_RGBA_C; + R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumnP_RGBA_C; + R_DrawSubClampColumn = R_DrawSubClampColumnP_RGBA_C; + R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumnP_RGBA_C; + R_DrawRevSubClampColumn = R_DrawRevSubClampColumnP_RGBA_C; + R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumnP_RGBA_C; + R_FillSpan = R_FillSpan_RGBA; + R_DrawFogBoundary = R_DrawFogBoundary_RGBA; + R_FillColumnHoriz = R_FillColumnHorizP_RGBA_C; + + R_DrawFogBoundary = R_DrawFogBoundary_RGBA; + R_MapColoredPlane = R_MapColoredPlane_RGBA; + R_DrawParticle = R_DrawParticle_RGBA; + + tmvline1_add = tmvline1_add_RGBA; + tmvline4_add = tmvline4_add_RGBA; + tmvline1_addclamp = tmvline1_addclamp_RGBA; + tmvline4_addclamp = tmvline4_addclamp_RGBA; + tmvline1_subclamp = tmvline1_subclamp_RGBA; + tmvline4_subclamp = tmvline4_subclamp_RGBA; + tmvline1_revsubclamp = tmvline1_revsubclamp_RGBA; + tmvline4_revsubclamp = tmvline4_revsubclamp_RGBA; + + rt_copy1col = rt_copy1col_RGBA_c; + rt_copy4cols = rt_copy4cols_RGBA_c; + rt_map1col = rt_map1col_RGBA_c; + rt_shaded4cols = rt_shaded4cols_RGBA_c; + rt_add4cols = rt_add4cols_RGBA_c; + rt_addclamp4cols = rt_addclamp4cols_RGBA_c; + rt_shaded1col = rt_shaded1col_RGBA_c; + rt_add1col = rt_add1col_RGBA_c; + rt_addclamp1col = rt_addclamp1col_RGBA_c; + rt_subclamp1col = rt_subclamp1col_RGBA_c; + rt_revsubclamp1col = rt_revsubclamp1col_RGBA_c; + rt_tlate1col = rt_tlate1col_RGBA_c; + rt_tlateadd1col = rt_tlateadd1col_RGBA_c; + rt_tlateaddclamp1col = rt_tlateaddclamp1col_RGBA_c; + rt_tlatesubclamp1col = rt_tlatesubclamp1col_RGBA_c; + rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_RGBA_c; + rt_map4cols = rt_map4cols_RGBA_c; + rt_subclamp4cols = rt_subclamp4cols_RGBA_c; + rt_revsubclamp4cols = rt_revsubclamp4cols_RGBA_c; + rt_tlate4cols = rt_tlate4cols_RGBA_c; + rt_tlateadd4cols = rt_tlateadd4cols_RGBA_c; + rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_RGBA_c; + rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_RGBA_c; + rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_RGBA_c; + rt_initcols = rt_initcols_rgba; + + dovline1 = vlinec1_RGBA; + doprevline1 = vlinec1_RGBA; + dovline4 = vlinec4_RGBA; + domvline1 = mvlinec1_RGBA; + domvline4 = mvlinec4_RGBA; } else { - rt_map4cols = rt_map4cols_asm1; - } +#ifdef X86_ASM + R_DrawColumn = R_DrawColumnP_ASM; + R_DrawColumnHoriz = R_DrawColumnHorizP_ASM; + R_DrawFuzzColumn = R_DrawFuzzColumnP_ASM; + R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C; + R_DrawShadedColumn = R_DrawShadedColumnP_C; + R_DrawSpan = R_DrawSpanP_ASM; + R_DrawSpanMasked = R_DrawSpanMaskedP_ASM; + if (CPU.Family <= 5) + { + rt_map4cols = rt_map4cols_asm2; + } + else + { + rt_map4cols = rt_map4cols_asm1; + } #else - R_DrawColumnHoriz = R_DrawColumnHorizP_C; - R_DrawColumn = R_DrawColumnP_C; - R_DrawFuzzColumn = R_DrawFuzzColumnP_C; - R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C; - R_DrawShadedColumn = R_DrawShadedColumnP_C; - R_DrawSpan = R_DrawSpanP_C; - R_DrawSpanMasked = R_DrawSpanMaskedP_C; - rt_map4cols = rt_map4cols_c; + R_DrawColumnHoriz = R_DrawColumnHorizP_C; + R_DrawColumn = R_DrawColumnP_C; + R_DrawFuzzColumn = R_DrawFuzzColumnP_C; + R_DrawTranslatedColumn = R_DrawTranslatedColumnP_C; + R_DrawShadedColumn = R_DrawShadedColumnP_C; + R_DrawSpan = R_DrawSpanP_C; + R_DrawSpanMasked = R_DrawSpanMaskedP_C; + rt_map4cols = rt_map4cols_c; #endif - R_DrawSpanTranslucent = R_DrawSpanTranslucentP_C; - R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_C; - R_DrawSpanAddClamp = R_DrawSpanAddClampP_C; - R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_C; - R_FillColumn = R_FillColumnP_C; - R_FillAddColumn = R_FillAddColumn_C; - R_FillAddClampColumn = R_FillAddClampColumn_C; - R_FillSubClampColumn = R_FillSubClampColumn_C; - R_FillRevSubClampColumn = R_FillRevSubClampColumn_C; - R_DrawAddColumn = R_DrawAddColumnP_C; - R_DrawTlatedAddColumn = R_DrawTlatedAddColumnP_C; - R_DrawAddClampColumn = R_DrawAddClampColumnP_C; - R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumnP_C; - R_DrawSubClampColumn = R_DrawSubClampColumnP_C; - R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumnP_C; - R_DrawRevSubClampColumn = R_DrawRevSubClampColumnP_C; - R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumnP_C; - R_FillSpan = R_FillSpan_C; - R_DrawFogBoundary = R_DrawFogBoundary_C; - R_FillColumnHoriz = R_FillColumnHorizP_C; + R_DrawSpanTranslucent = R_DrawSpanTranslucentP_C; + R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_C; + R_DrawSpanAddClamp = R_DrawSpanAddClampP_C; + R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_C; + R_FillColumn = R_FillColumnP_C; + R_FillAddColumn = R_FillAddColumn_C; + R_FillAddClampColumn = R_FillAddClampColumn_C; + R_FillSubClampColumn = R_FillSubClampColumn_C; + R_FillRevSubClampColumn = R_FillRevSubClampColumn_C; + R_DrawAddColumn = R_DrawAddColumnP_C; + R_DrawTlatedAddColumn = R_DrawTlatedAddColumnP_C; + R_DrawAddClampColumn = R_DrawAddClampColumnP_C; + R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumnP_C; + R_DrawSubClampColumn = R_DrawSubClampColumnP_C; + R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumnP_C; + R_DrawRevSubClampColumn = R_DrawRevSubClampColumnP_C; + R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumnP_C; + R_FillSpan = R_FillSpan_C; + R_DrawFogBoundary = R_DrawFogBoundary_C; + R_FillColumnHoriz = R_FillColumnHorizP_C; - R_DrawFogBoundary = R_DrawFogBoundary_C; - R_MapColoredPlane = R_MapColoredPlane_C; - R_DrawParticle = R_DrawParticle_C; + R_DrawFogBoundary = R_DrawFogBoundary_C; + R_MapColoredPlane = R_MapColoredPlane_C; + R_DrawParticle = R_DrawParticle_C; - tmvline1_add = tmvline1_add_C; - tmvline4_add = tmvline4_add_C; - tmvline1_addclamp = tmvline1_addclamp_C; - tmvline4_addclamp = tmvline4_addclamp_C; - tmvline1_subclamp = tmvline1_subclamp_C; - tmvline4_subclamp = tmvline4_subclamp_C; - tmvline1_revsubclamp = tmvline1_revsubclamp_C; - tmvline4_revsubclamp = tmvline4_revsubclamp_C; + tmvline1_add = tmvline1_add_C; + tmvline4_add = tmvline4_add_C; + tmvline1_addclamp = tmvline1_addclamp_C; + tmvline4_addclamp = tmvline4_addclamp_C; + tmvline1_subclamp = tmvline1_subclamp_C; + tmvline4_subclamp = tmvline4_subclamp_C; + tmvline1_revsubclamp = tmvline1_revsubclamp_C; + tmvline4_revsubclamp = tmvline4_revsubclamp_C; #ifdef X86_ASM - rt_copy1col = rt_copy1col_asm; - rt_copy4cols = rt_copy4cols_asm; - rt_map1col = rt_map1col_asm; - rt_shaded4cols = rt_shaded4cols_asm; - rt_add4cols = rt_add4cols_asm; - rt_addclamp4cols = rt_addclamp4cols_asm; + rt_copy1col = rt_copy1col_asm; + rt_copy4cols = rt_copy4cols_asm; + rt_map1col = rt_map1col_asm; + rt_shaded4cols = rt_shaded4cols_asm; + rt_add4cols = rt_add4cols_asm; + rt_addclamp4cols = rt_addclamp4cols_asm; #else - rt_copy1col = rt_copy1col_c; - rt_copy4cols = rt_copy4cols_c; - rt_map1col = rt_map1col_c; - rt_shaded4cols = rt_shaded4cols_c; - rt_add4cols = rt_add4cols_c; - rt_addclamp4cols = rt_addclamp4cols_c; -#endif - rt_shaded1col = rt_shaded1col_c; - rt_add1col = rt_add1col_c; - rt_addclamp1col = rt_addclamp1col_c; - rt_subclamp1col = rt_subclamp1col_c; - rt_revsubclamp1col = rt_revsubclamp1col_c; - rt_tlate1col = rt_tlate1col_c; - rt_tlateadd1col = rt_tlateadd1col_c; - rt_tlateaddclamp1col = rt_tlateaddclamp1col_c; - rt_tlatesubclamp1col = rt_tlatesubclamp1col_c; - rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_c; - rt_map4cols = rt_map4cols_c; - rt_subclamp4cols = rt_subclamp4cols_c; - rt_revsubclamp4cols = rt_revsubclamp4cols_c; - rt_tlate4cols = rt_tlate4cols_c; - rt_tlateadd4cols = rt_tlateadd4cols_c; - rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_c; - rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_c; - rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_c; - rt_initcols = rt_initcols_pal; - + rt_copy1col = rt_copy1col_c; + rt_copy4cols = rt_copy4cols_c; + rt_map1col = rt_map1col_c; + rt_shaded4cols = rt_shaded4cols_c; + rt_add4cols = rt_add4cols_c; + rt_addclamp4cols = rt_addclamp4cols_c; #endif + rt_shaded1col = rt_shaded1col_c; + rt_add1col = rt_add1col_c; + rt_addclamp1col = rt_addclamp1col_c; + rt_subclamp1col = rt_subclamp1col_c; + rt_revsubclamp1col = rt_revsubclamp1col_c; + rt_tlate1col = rt_tlate1col_c; + rt_tlateadd1col = rt_tlateadd1col_c; + rt_tlateaddclamp1col = rt_tlateaddclamp1col_c; + rt_tlatesubclamp1col = rt_tlatesubclamp1col_c; + rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_c; + rt_map4cols = rt_map4cols_c; + rt_subclamp4cols = rt_subclamp4cols_c; + rt_revsubclamp4cols = rt_revsubclamp4cols_c; + rt_tlate4cols = rt_tlate4cols_c; + rt_tlateadd4cols = rt_tlateadd4cols_c; + rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_c; + rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_c; + rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_c; + rt_initcols = rt_initcols_pal; + } } // [RH] Choose column drawers in a single place diff --git a/src/r_main.cpp b/src/r_main.cpp index b7723d07d..d85cd62a0 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -103,6 +103,7 @@ bool r_dontmaplines; CVAR (String, r_viewsize, "", CVAR_NOSET) CVAR (Bool, r_shadercolormaps, true, CVAR_ARCHIVE) +CVAR (Bool, r_swtruecolor, false, CVAR_ARCHIVE) double r_BaseVisibility; double r_WallVisibility; diff --git a/src/win32/fb_d3d9.cpp b/src/win32/fb_d3d9.cpp index 14a78d4cd..0cc9045ee 100644 --- a/src/win32/fb_d3d9.cpp +++ b/src/win32/fb_d3d9.cpp @@ -187,6 +187,7 @@ EXTERN_CVAR (Float, Gamma) EXTERN_CVAR (Bool, vid_vsync) EXTERN_CVAR (Float, transsouls) EXTERN_CVAR (Int, vid_refreshrate) +EXTERN_CVAR (Bool, r_swtruecolor) extern IDirect3D9 *D3D; @@ -765,11 +766,7 @@ void D3DFB::KillNativeTexs() bool D3DFB::CreateFBTexture () { -#ifndef PALETTEOUTPUT - D3DFORMAT FBFormat = D3DFMT_A8R8G8B8; -#else - D3DFORMAT FBFormat = D3DFMT_L8; -#endif + FBFormat = r_swtruecolor ? D3DFMT_A8R8G8B8 : D3DFMT_L8; if (FAILED(D3DDevice->CreateTexture(Width, Height, 1, D3DUSAGE_DYNAMIC, FBFormat, D3DPOOL_DEFAULT, &FBTexture, NULL))) { @@ -1310,20 +1307,45 @@ void D3DFB::Draw3DPart(bool copy3d) SUCCEEDED(FBTexture->LockRect (0, &lockrect, NULL, D3DLOCK_DISCARD))) || SUCCEEDED(FBTexture->LockRect (0, &lockrect, &texrect, 0))) { - if (lockrect.Pitch == Pitch * sizeof(canvas_pixel_t) && Pitch == Width) + if (r_swtruecolor && FBFormat == D3DFMT_A8R8G8B8) { - memcpy (lockrect.pBits, MemBuffer, Width * Height * sizeof(canvas_pixel_t)); + if (lockrect.Pitch == Pitch * sizeof(uint32_t) && Pitch == Width) + { + memcpy(lockrect.pBits, MemBuffer, Width * Height * sizeof(uint32_t)); + } + else + { + uint32_t *dest = (uint32_t *)lockrect.pBits; + uint32_t *src = MemBuffer; + for (int y = 0; y < Height; y++) + { + memcpy(dest, src, Width * sizeof(uint32_t)); + dest = reinterpret_cast(reinterpret_cast(dest) + lockrect.Pitch); + src += Pitch; + } + } + } + else if (!r_swtruecolor && FBFormat == D3DFMT_L8) + { + if (lockrect.Pitch == Pitch && Pitch == Width) + { + memcpy(lockrect.pBits, MemBuffer, Width * Height); + } + else + { + BYTE *dest = (BYTE *)lockrect.pBits; + BYTE *src = (BYTE *)MemBuffer; + for (int y = 0; y < Height; y++) + { + memcpy(dest, src, Width); + dest = reinterpret_cast(reinterpret_cast(dest) + lockrect.Pitch); + src += Pitch; + } + } } else { - canvas_pixel_t *dest = (canvas_pixel_t *)lockrect.pBits; - canvas_pixel_t *src = MemBuffer; - for (int y = 0; y < Height; y++) - { - memcpy (dest, src, Width * sizeof(canvas_pixel_t)); - dest = reinterpret_cast(reinterpret_cast(dest) + lockrect.Pitch); - src += Pitch; - } + memset(lockrect.pBits, 0, lockrect.Pitch * Height); } FBTexture->UnlockRect (0); } @@ -1355,11 +1377,10 @@ void D3DFB::Draw3DPart(bool copy3d) memset(Constant, 0, sizeof(Constant)); SetAlphaBlend(D3DBLENDOP(0)); EnableAlphaTest(FALSE); -#ifndef PALETTEOUTPUT - SetPixelShader(Shaders[SHADER_NormalColor]); -#else - SetPixelShader(Shaders[SHADER_NormalColorPal]); -#endif + if (r_swtruecolor) + SetPixelShader(Shaders[SHADER_NormalColor]); + else + SetPixelShader(Shaders[SHADER_NormalColorPal]); if (copy3d) { FBVERTEX verts[4]; @@ -1377,11 +1398,10 @@ void D3DFB::Draw3DPart(bool copy3d) realfixedcolormap->ColorizeStart[1]/2, realfixedcolormap->ColorizeStart[2]/2, 0); color1 = D3DCOLOR_COLORVALUE(realfixedcolormap->ColorizeEnd[0]/2, realfixedcolormap->ColorizeEnd[1]/2, realfixedcolormap->ColorizeEnd[2]/2, 1); -#ifndef PALETTEOUTPUT - SetPixelShader(Shaders[SHADER_SpecialColormap]); -#else - SetPixelShader(Shaders[SHADER_SpecialColormapPal]); -#endif + if (r_swtruecolor) + SetPixelShader(Shaders[SHADER_SpecialColormap]); + else + SetPixelShader(Shaders[SHADER_SpecialColormapPal]); } } else @@ -1392,11 +1412,10 @@ void D3DFB::Draw3DPart(bool copy3d) CalcFullscreenCoords(verts, Accel2D, false, color0, color1); D3DDevice->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, verts, sizeof(FBVERTEX)); } -#ifndef PALETTEOUTPUT - SetPixelShader(Shaders[SHADER_NormalColor]); -#else - SetPixelShader(Shaders[SHADER_NormalColorPal]); -#endif + if (r_swtruecolor) + SetPixelShader(Shaders[SHADER_NormalColor]); + else + SetPixelShader(Shaders[SHADER_NormalColorPal]); } //========================================================================== diff --git a/src/win32/win32iface.h b/src/win32/win32iface.h index 73a2c6966..d26765100 100644 --- a/src/win32/win32iface.h +++ b/src/win32/win32iface.h @@ -424,6 +424,7 @@ private: bool NeedPalUpdate; bool NeedGammaUpdate; int FBWidth, FBHeight; + D3DFORMAT FBFormat; bool VSync; RECT BlendingRect; int In2D; diff --git a/wadsrc/static/language.enu b/wadsrc/static/language.enu index 62761a417..001172185 100644 --- a/wadsrc/static/language.enu +++ b/wadsrc/static/language.enu @@ -1780,6 +1780,7 @@ DSPLYMNU_BRIGHTNESS = "Brightness"; DSPLYMNU_VSYNC = "Vertical Sync"; DSPLYMNU_CAPFPS = "Rendering Interpolation"; DSPLYMNU_COLUMNMETHOD = "Column render mode"; +DSPLYMNU_TRUECOLOR = "True color output"; DSPLYMNU_WIPETYPE = "Screen wipe style"; DSPLYMNU_SHOWENDOOM = "Show ENDOOM screen"; DSPLYMNU_PALLETEHACK = "DirectDraw palette hack"; // Not used diff --git a/wadsrc/static/menudef.txt b/wadsrc/static/menudef.txt index ff395ff2a..93e33ce79 100644 --- a/wadsrc/static/menudef.txt +++ b/wadsrc/static/menudef.txt @@ -661,6 +661,7 @@ OptionMenu "VideoOptions" Option "$DSPLYMNU_VSYNC", "vid_vsync", "OnOff" Option "$DSPLYMNU_CAPFPS", "cl_capfps", "OffOn" Option "$DSPLYMNU_COLUMNMETHOD", "r_columnmethod", "ColumnMethods" + Option "$DSPLYMNU_TRUECOLOR", "r_swtruecolor", "OnOff" StaticText " " Option "$DSPLYMNU_WIPETYPE", "wipetype", "Wipes" From 20b7743ec39088186e49142146d40c43e0cccae5 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 31 May 2016 01:49:39 +0200 Subject: [PATCH 004/100] Added R_SetColorMapLight and R_SetDSColorMapLight --- src/f_wipe.cpp | 37 +++++++------ src/r_draw.cpp | 27 ++++++++++ src/r_draw.h | 6 +++ src/r_main.h | 4 -- src/r_plane.cpp | 9 +--- src/r_segs.cpp | 90 ++++++++++--------------------- src/r_things.cpp | 24 ++------- src/textures/canvastexture.cpp | 9 ++-- src/v_draw.cpp | 49 +++++++++-------- src/v_video.cpp | 98 ++++++++++++++++++---------------- 10 files changed, 166 insertions(+), 187 deletions(-) diff --git a/src/f_wipe.cpp b/src/f_wipe.cpp index c6f20cadb..a86f93fc4 100644 --- a/src/f_wipe.cpp +++ b/src/f_wipe.cpp @@ -33,6 +33,8 @@ // SCREEN WIPE PACKAGE // +EXTERN_CVAR(Bool, r_swtruecolor) + static int CurrentWipeType; static short *wipe_scr_start; @@ -77,10 +79,8 @@ bool wipe_initMelt (int ticks) { int i, r; -#ifdef PALETTEOUTPUT // copy start screen to main screen - screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); -#endif + screen->DrawBlock(0, 0, SCREENWIDTH, SCREENHEIGHT, (canvas_pixel_t *)wipe_scr_start); // makes this wipe faster (in theory) // to have stuff in column-major format @@ -301,9 +301,6 @@ bool wipe_doBurn (int ticks) } else { -#ifndef PALETTEOUTPUT - // TO DO: RGB32k.All -#else int bglevel = 64-fglevel; DWORD *fg2rgb = Col2RGB8[fglevel]; DWORD *bg2rgb = Col2RGB8[bglevel]; @@ -311,7 +308,6 @@ bool wipe_doBurn (int ticks) DWORD bg = bg2rgb[fromold[x]]; fg = (fg+bg) | 0x1f07c1f; to[x] = RGB32k.All[fg & (fg>>15)]; -#endif done = false; } } @@ -342,9 +338,7 @@ bool wipe_doFade (int ticks) fade += ticks * 2; if (fade > 64) { -#ifdef PALETTEOUTPUT - screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_end); -#endif + screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (canvas_pixel_t *)wipe_scr_end); return true; } else @@ -391,14 +385,15 @@ static bool (*wipes[])(int) = // Returns true if the wipe should be performed. bool wipe_StartScreen (int type) { + if (r_swtruecolor) + return false; + CurrentWipeType = clamp(type, 0, wipe_NUMWIPES - 1); if (CurrentWipeType) { wipe_scr_start = new short[SCREENWIDTH * SCREENHEIGHT / 2]; -#ifdef PALETTEOUTPUT - screen->GetBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); -#endif + screen->GetBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (canvas_pixel_t *)wipe_scr_start); return true; } return false; @@ -406,13 +401,15 @@ bool wipe_StartScreen (int type) void wipe_EndScreen (void) { + if (r_swtruecolor) + return; + if (CurrentWipeType) { wipe_scr_end = new short[SCREENWIDTH * SCREENHEIGHT / 2]; -#ifdef PALETTEOUTPUT - screen->GetBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_end); - screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); // restore start scr. -#endif + screen->GetBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (canvas_pixel_t *)wipe_scr_end); + screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (canvas_pixel_t *)wipe_scr_start); // restore start scr. + // Initialize the wipe (*wipes[(CurrentWipeType-1)*3])(0); } @@ -423,6 +420,9 @@ bool wipe_ScreenWipe (int ticks) { bool rc; + if (r_swtruecolor) + return true; + if (CurrentWipeType == wipe_None) return true; @@ -436,6 +436,9 @@ bool wipe_ScreenWipe (int ticks) // Final things for the wipe void wipe_Cleanup() { + if (r_swtruecolor) + return; + if (wipe_scr_start != NULL) { delete[] wipe_scr_start; diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 83a4472f3..cd34a71b4 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -4601,3 +4601,30 @@ bool R_GetTransMaskDrawers (fixed_t (**tmvline1)(), void (**tmvline4)()) return false; } +void R_SetColorMapLight(BYTE *basecolormapdata, float light, int shade) +{ + if (r_swtruecolor) + { + dc_colormap = basecolormapdata; + dc_light = LIGHTSCALE(light, shade); + } + else + { + dc_colormap = basecolormapdata + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); + dc_light = 0; + } +} + +void R_SetDSColorMapLight(BYTE *basecolormapdata, float light, int shade) +{ + if (r_swtruecolor) + { + ds_colormap = basecolormapdata; + ds_light = LIGHTSCALE(light, shade); + } + else + { + ds_colormap = basecolormapdata + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); + ds_light = 0; + } +} diff --git a/src/r_draw.h b/src/r_draw.h index 17698c360..db109dbee 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -386,4 +386,10 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ // transmaskwallscan is like maskwallscan, but it can also blend to the background void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); +// Sets dc_colormap and dc_light to their appropriate values depending on the output format (pal vs true color) +void R_SetColorMapLight(BYTE *base_colormap, float light, int shade); + +// Same as R_SetColorMapLight, but for ds_colormap and ds_light +void R_SetDSColorMapLight(BYTE *base_colormap, float light, int shade); + #endif diff --git a/src/r_main.h b/src/r_main.h index 37a41a763..c1034ea3e 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -86,8 +86,6 @@ extern bool r_dontmaplines; // This is used instead of GETPALOOKUP when ds_colormap+dc_colormap is set to the base colormap #define LIGHTSCALE(vis,shade) ((shade)-FLOAT2FIXED(MIN(MAXLIGHTVIS,double(vis)))) -#ifndef PALETTEOUTPUT - // calculates the light constant passed to the shade_pal_index function inline uint32_t calc_light_multiplier(dsfixed_t light) { @@ -108,8 +106,6 @@ inline uint32_t shade_pal_index(uint32_t index, uint32_t light) return 0xff000000 | (red << 16) | (green << 8) | blue; } -#endif - extern double GlobVis; void R_SetVisibility(double visibility); diff --git a/src/r_plane.cpp b/src/r_plane.cpp index c8258a1ba..8d0c882ba 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -227,14 +227,7 @@ void R_MapPlane (int y, int x1) if (plane_shade) { // Determine lighting based on the span's distance from the viewer. -#ifndef PALETTEOUTPUT - ds_colormap = basecolormap->Maps; - ds_light = LIGHTSCALE(GlobVis * fabs(CenterY - y), planeshade); -#else - ds_colormap = basecolormap->Maps + (GETPALOOKUP ( - GlobVis * fabs(CenterY - y), planeshade) << COLORMAPSHIFT); - ds_light = 0; -#endif + R_SetDSColorMapLight(basecolormap->Maps, GlobVis * fabs(CenterY - y), planeshade); } #ifdef X86_ASM diff --git a/src/r_segs.cpp b/src/r_segs.cpp index fb27a99de..548cd994f 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -61,6 +61,8 @@ CVAR(Bool, r_np2, true, 0) //CVAR (Int, ty, 8, 0) //CVAR (Int, tx, 8, 0) +EXTERN_CVAR(Bool, r_swtruecolor) + #define HEIGHTBITS 12 #define HEIGHTSHIFT (FRACBITS-HEIGHTBITS) @@ -1138,13 +1140,7 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l if (!fixed) { // calculate lighting -#ifndef PALETTEOUTPUT - dc_colormap = basecolormapdata; - dc_light = LIGHTSCALE(light, wallshade); -#else - dc_colormap = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); - dc_light = 0; -#endif + R_SetColorMapLight(basecolormapdata, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1184,13 +1180,16 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l for (z = 0; z < 4; ++z) { light += rw_lightstep; -#ifndef PALETTEOUTPUT - palookupoffse[z] = basecolormapdata; - palookuplight[z] = LIGHTSCALE(light, wallshade); -#else - palookupoffse[z] = basecolormapdata + (GETPALOOKUP(12/*light*/, wallshade) << COLORMAPSHIFT); - palookuplight[z] = 0; -#endif + if (r_swtruecolor) + { + palookupoffse[z] = basecolormapdata; + palookuplight[z] = LIGHTSCALE(light, wallshade); + } + else + { + palookupoffse[z] = basecolormapdata + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + palookuplight[z] = 0; + } } } @@ -1245,13 +1244,7 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l if (!fixed) { // calculate lighting -#ifndef PALETTEOUTPUT - dc_colormap = basecolormapdata; - dc_light = LIGHTSCALE(light, wallshade); -#else - dc_colormap = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); - dc_light = 0; -#endif + R_SetColorMapLight(basecolormapdata, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1690,13 +1683,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f if (!fixed) { // calculate lighting -#ifndef PALETTEOUTPUT - dc_colormap = basecolormapdata; - dc_light = LIGHTSCALE(light, wallshade); -#else - dc_colormap = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); - dc_light = 0; -#endif + R_SetColorMapLight(basecolormapdata, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1734,12 +1721,15 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f for (z = 0; z < 4; ++z) { light += rw_lightstep; -#ifndef PALETTEOUTPUT - palookupoffse[z] = basecolormapdata; - palookuplight[z] = LIGHTSCALE(light, wallshade); -#else - palookupoffse[z] = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); -#endif + if (r_swtruecolor) + { + palookupoffse[z] = basecolormapdata; + palookuplight[z] = LIGHTSCALE(light, wallshade); + } + else + { + palookupoffse[z] = basecolormapdata + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + } } } @@ -1795,13 +1785,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f if (!fixed) { // calculate lighting -#ifndef PALETTEOUTPUT - dc_colormap = basecolormapdata; - dc_light = LIGHTSCALE(light, wallshade); -#else - dc_colormap = basecolormapdata + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - dc_light = 0; -#endif + R_SetColorMapLight(basecolormapdata, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -3295,13 +3279,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting -#ifndef PALETTEOUTPUT - dc_colormap = usecolormap->Maps; - dc_light = LIGHTSCALE(rw_light, wallshade); -#else - dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); - dc_light = 0; -#endif + R_SetColorMapLight(usecolormap->Maps, rw_light, wallshade); } R_WallSpriteColumn (R_DrawMaskedColumn); dc_x++; @@ -3311,13 +3289,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting -#ifndef PALETTEOUTPUT - dc_colormap = usecolormap->Maps; - dc_light = LIGHTSCALE(rw_light, wallshade); -#else - dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); - dc_light = 0; -#endif + R_SetColorMapLight(usecolormap->Maps, rw_light, wallshade); } rt_initcols(nullptr); for (int zz = 4; zz; --zz) @@ -3332,13 +3304,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting -#ifndef PALETTEOUTPUT - dc_colormap = usecolormap->Maps; - dc_light = LIGHTSCALE(rw_light, wallshade); -#else - dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); - dc_light = 0; -#endif + R_SetColorMapLight(usecolormap->Maps, rw_light, wallshade); } R_WallSpriteColumn (R_DrawMaskedColumn); dc_x++; diff --git a/src/r_things.cpp b/src/r_things.cpp index a6f6aea28..22538bd40 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -594,13 +594,7 @@ void R_DrawWallSprite(vissprite_t *spr) { if (calclighting) { // calculate lighting -#ifndef PALETTEOUTPUT - dc_colormap = usecolormap->Maps; - dc_light = LIGHTSCALE(rw_light, shade); -#else - dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, shade) << COLORMAPSHIFT); - dc_light = FLOAT2FIXED(MAXLIGHTVIS); -#endif + R_SetColorMapLight(usecolormap->Maps, rw_light, shade); } if (!R_ClipSpriteColumnWithPortals(spr)) R_WallSpriteColumn(R_DrawMaskedColumn); @@ -611,13 +605,7 @@ void R_DrawWallSprite(vissprite_t *spr) { if (calclighting) { // calculate lighting -#ifndef PALETTEOUTPUT - dc_colormap = usecolormap->Maps; - dc_light = LIGHTSCALE(rw_light, shade); -#else - dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, shade) << COLORMAPSHIFT); - dc_light = FLOAT2FIXED(MAXLIGHTVIS); -#endif + R_SetColorMapLight(usecolormap->Maps, rw_light, shade); } rt_initcols(nullptr); for (int zz = 4; zz; --zz) @@ -633,13 +621,7 @@ void R_DrawWallSprite(vissprite_t *spr) { if (calclighting) { // calculate lighting -#ifndef PALETTEOUTPUT - dc_colormap = usecolormap->Maps; - dc_light = LIGHTSCALE(rw_light, shade); -#else - dc_colormap = usecolormap->Maps + (GETPALOOKUP (rw_light, shade) << COLORMAPSHIFT); - dc_light = FLOAT2FIXED(MAXLIGHTVIS); -#endif + R_SetColorMapLight(usecolormap->Maps, rw_light, shade); } if (!R_ClipSpriteColumnWithPortals(spr)) R_WallSpriteColumn(R_DrawMaskedColumn); diff --git a/src/textures/canvastexture.cpp b/src/textures/canvastexture.cpp index 7388c1306..d1f70439f 100644 --- a/src/textures/canvastexture.cpp +++ b/src/textures/canvastexture.cpp @@ -106,10 +106,7 @@ void FCanvasTexture::MakeTexture () Canvas = new DSimpleCanvas (Width, Height); Canvas->Lock (); GC::AddSoftRoot(Canvas); -#ifndef PALETTEOUTPUT - Pixels = new BYTE[Width*Height]; - bPixelsAllocated = true; -#else + if (Width != Height || Width != Canvas->GetPitch()) { Pixels = new BYTE[Width*Height]; @@ -117,10 +114,10 @@ void FCanvasTexture::MakeTexture () } else { - Pixels = Canvas->GetBuffer(); + Pixels = (BYTE*)Canvas->GetBuffer(); bPixelsAllocated = false; } -#endif + // Draw a special "unrendered" initial texture into the buffer. memset (Pixels, 0, Width*Height/2); memset (Pixels+Width*Height/2, 255, Width*Height/2); diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 8853fc947..984375f25 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -77,6 +77,8 @@ extern "C" short spanend[MAXHEIGHT]; CVAR (Bool, hud_scale, false, CVAR_ARCHIVE); +EXTERN_CVAR(Bool, r_swtruecolor) + // For routines that take RGB colors, cache the previous lookup in case there // are several repetitions with the same color. static int LastPal = -1; @@ -1017,32 +1019,35 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) oldyyshifted = yy * GetPitch(); } -#ifndef PALETTEOUTPUT - canvas_pixel_t *spot = GetBuffer() + oldyyshifted + xx; + if (r_swtruecolor) + { + canvas_pixel_t *spot = GetBuffer() + oldyyshifted + xx; - uint32_t fg = shade_pal_index(basecolor, calc_light_multiplier(0)); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; + uint32_t fg = shade_pal_index(basecolor, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; - uint32_t bg_red = (*spot >> 16) & 0xff; - uint32_t bg_green = (*spot >> 8) & 0xff; - uint32_t bg_blue = (*spot) & 0xff; + uint32_t bg_red = (*spot >> 16) & 0xff; + uint32_t bg_green = (*spot >> 8) & 0xff; + uint32_t bg_blue = (*spot) & 0xff; - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; - *spot = 0xff000000 | (red << 16) | (green << 8) | blue; -#else - canvas_pixel_t *spot = GetBuffer() + oldyyshifted + xx; - DWORD *bg2rgb = Col2RGB8[1+level]; - DWORD *fg2rgb = Col2RGB8[63-level]; - DWORD fg = fg2rgb[basecolor]; - DWORD bg = bg2rgb[*spot]; - bg = (fg+bg) | 0x1f07c1f; - *spot = RGB32k.All[bg&(bg>>15)]; -#endif + *spot = 0xff000000 | (red << 16) | (green << 8) | blue; + } + else + { + canvas_pixel_t *spot = GetBuffer() + oldyyshifted + xx; + DWORD *bg2rgb = Col2RGB8[1+level]; + DWORD *fg2rgb = Col2RGB8[63-level]; + DWORD fg = fg2rgb[basecolor]; + DWORD bg = bg2rgb[*spot]; + bg = (fg+bg) | 0x1f07c1f; + *spot = RGB32k.All[bg&(bg>>15)]; + } } void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 realcolor) diff --git a/src/v_video.cpp b/src/v_video.cpp index b6a626753..2fb46e88a 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -65,6 +65,7 @@ #include "menu/menu.h" #include "r_data/voxels.h" +EXTERN_CVAR(Bool, r_swtruecolor) FRenderer *Renderer; @@ -367,65 +368,68 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) spot = Buffer + x1 + y1*Pitch; gap = Pitch - w; -#ifndef PALETTEOUTPUT - uint32_t fg = color.d; - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t alpha = (uint32_t)clamp(damount * 256 + 0.5f, 0.0f, 256.0f); - uint32_t inv_alpha = 256 - alpha; - - fg_red *= alpha; - fg_green *= alpha; - fg_blue *= alpha; - - for (y = h; y != 0; y--) + if (r_swtruecolor) { - for (x = w; x != 0; x--) + uint32_t fg = color.d; + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t alpha = (uint32_t)clamp(damount * 256 + 0.5f, 0.0f, 256.0f); + uint32_t inv_alpha = 256 - alpha; + + fg_red *= alpha; + fg_green *= alpha; + fg_blue *= alpha; + + for (y = h; y != 0; y--) { - uint32_t bg_red = (*spot >> 16) & 0xff; - uint32_t bg_green = (*spot >> 8) & 0xff; - uint32_t bg_blue = (*spot) & 0xff; + for (x = w; x != 0; x--) + { + uint32_t bg_red = (*spot >> 16) & 0xff; + uint32_t bg_green = (*spot >> 8) & 0xff; + uint32_t bg_blue = (*spot) & 0xff; - uint32_t red = (fg_red + bg_red * inv_alpha) / 256; - uint32_t green = (fg_green + bg_green * inv_alpha) / 256; - uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256; + uint32_t red = (fg_red + bg_red * inv_alpha) / 256; + uint32_t green = (fg_green + bg_green * inv_alpha) / 256; + uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256; - *spot = 0xff000000 | (red << 16) | (green << 8) | blue; - spot++; + *spot = 0xff000000 | (red << 16) | (green << 8) | blue; + spot++; + } + spot += gap; } - spot += gap; } -#else - DWORD *bg2rgb; - DWORD fg; - + else { - int amount; + DWORD *bg2rgb; + DWORD fg; - amount = (int)(damount * 64); - bg2rgb = Col2RGB8[64-amount]; - - fg = (((color.r * amount) >> 4) << 20) | - ((color.g * amount) >> 4) | - (((color.b * amount) >> 4) << 10); - } - - for (y = h; y != 0; y--) - { - for (x = w; x != 0; x--) { - DWORD bg; + int amount; - bg = bg2rgb[(*spot)&0xff]; - bg = (fg+bg) | 0x1f07c1f; - *spot = RGB32k.All[bg&(bg>>15)]; - spot++; + amount = (int)(damount * 64); + bg2rgb = Col2RGB8[64-amount]; + + fg = (((color.r * amount) >> 4) << 20) | + ((color.g * amount) >> 4) | + (((color.b * amount) >> 4) << 10); + } + + for (y = h; y != 0; y--) + { + for (x = w; x != 0; x--) + { + DWORD bg; + + bg = bg2rgb[(*spot)&0xff]; + bg = (fg+bg) | 0x1f07c1f; + *spot = RGB32k.All[bg&(bg>>15)]; + spot++; + } + spot += gap; } - spot += gap; } -#endif } //========================================================================== From 045bad1b5287d75f2c9f3d84e4a6cc2975499c18 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 31 May 2016 05:31:32 +0200 Subject: [PATCH 005/100] Removed the need for the pixel_canvas_t typedef --- src/basictypes.h | 6 -- src/f_wipe.cpp | 14 +-- src/m_misc.cpp | 2 - src/r_draw.cpp | 234 ++++++++++++++++++++--------------------- src/r_draw.h | 12 +-- src/r_drawt.cpp | 71 +++++++------ src/r_drawt_rgba.cpp | 94 ++++++++--------- src/r_main.cpp | 34 ++++-- src/r_plane.cpp | 130 ++++++++++++++++++++++- src/r_plane.h | 4 + src/r_segs.cpp | 93 ++++++++++------ src/r_things.cpp | 23 ++-- src/v_draw.cpp | 112 +++++++++++++++----- src/v_video.cpp | 35 ++++-- src/v_video.h | 14 +-- src/win32/fb_d3d9.cpp | 6 +- src/win32/fb_ddraw.cpp | 4 +- src/win32/win32iface.h | 2 +- 18 files changed, 561 insertions(+), 329 deletions(-) diff --git a/src/basictypes.h b/src/basictypes.h index 45e33a4a7..ff2cd972e 100644 --- a/src/basictypes.h +++ b/src/basictypes.h @@ -66,12 +66,6 @@ union QWORD_UNION typedef SDWORD fixed_t; typedef DWORD dsfixed_t; // fixedpt used by span drawer -#ifndef PALETTEOUTPUT -typedef uint32_t canvas_pixel_t; -#else -typedef BYTE canvas_pixel_t; -#endif - #define FIXED_MAX (signed)(0x7fffffff) #define FIXED_MIN (signed)(0x80000000) diff --git a/src/f_wipe.cpp b/src/f_wipe.cpp index a86f93fc4..7e1ec678e 100644 --- a/src/f_wipe.cpp +++ b/src/f_wipe.cpp @@ -80,7 +80,7 @@ bool wipe_initMelt (int ticks) int i, r; // copy start screen to main screen - screen->DrawBlock(0, 0, SCREENWIDTH, SCREENHEIGHT, (canvas_pixel_t *)wipe_scr_start); + screen->DrawBlock(0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); // makes this wipe faster (in theory) // to have stuff in column-major format @@ -273,7 +273,7 @@ bool wipe_doBurn (int ticks) // Draw the screen int xstep, ystep, firex, firey; int x, y; - canvas_pixel_t *to; + BYTE *to; BYTE *fromold, *fromnew; const int SHIFT = 16; @@ -338,7 +338,7 @@ bool wipe_doFade (int ticks) fade += ticks * 2; if (fade > 64) { - screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (canvas_pixel_t *)wipe_scr_end); + screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_end); return true; } else @@ -349,7 +349,7 @@ bool wipe_doFade (int ticks) DWORD *bg2rgb = Col2RGB8[bglevel]; BYTE *fromnew = (BYTE *)wipe_scr_end; BYTE *fromold = (BYTE *)wipe_scr_start; - canvas_pixel_t *to = screen->GetBuffer(); + BYTE *to = screen->GetBuffer(); for (y = 0; y < SCREENHEIGHT; y++) { @@ -393,7 +393,7 @@ bool wipe_StartScreen (int type) if (CurrentWipeType) { wipe_scr_start = new short[SCREENWIDTH * SCREENHEIGHT / 2]; - screen->GetBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (canvas_pixel_t *)wipe_scr_start); + screen->GetBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); return true; } return false; @@ -407,8 +407,8 @@ void wipe_EndScreen (void) if (CurrentWipeType) { wipe_scr_end = new short[SCREENWIDTH * SCREENHEIGHT / 2]; - screen->GetBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (canvas_pixel_t *)wipe_scr_end); - screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (canvas_pixel_t *)wipe_scr_start); // restore start scr. + screen->GetBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_end); + screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); // restore start scr. // Initialize the wipe (*wipes[(CurrentWipeType-1)*3])(0); diff --git a/src/m_misc.cpp b/src/m_misc.cpp index 79416c31d..87f61f253 100644 --- a/src/m_misc.cpp +++ b/src/m_misc.cpp @@ -655,7 +655,6 @@ static bool FindFreeName (FString &fullname, const char *extension) void M_ScreenShot (const char *filename) { -#ifdef PALETTEOUTPUT FILE *file; FString autoname; bool writepcx = (stricmp (screenshot_type, "pcx") == 0); // PNG is the default @@ -744,7 +743,6 @@ void M_ScreenShot (const char *filename) Printf ("Could not create screenshot.\n"); } } -#endif } CCMD (screenshot) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index cd34a71b4..f939406bb 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -65,7 +65,7 @@ extern int ST_Y; BYTE* viewimage; extern "C" { int ylookup[MAXHEIGHT]; -canvas_pixel_t *dc_destorg; +BYTE* dc_destorg; } int scaledviewwidth; @@ -99,6 +99,7 @@ void (*R_DrawSpanMaskedAddClamp)(void); void (*R_FillSpan)(void); void (*R_FillColumnHoriz)(void); void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip); +void (*R_MapTiltedPlane)(int y, int x1); void (*R_MapColoredPlane)(int y, int x1); void (*R_DrawParticle)(vissprite_t *); fixed_t (*tmvline1_add)(); @@ -133,7 +134,7 @@ void (*rt_tlateadd4cols)(int sx, int yl, int yh); void (*rt_tlateaddclamp4cols)(int sx, int yl, int yh); void (*rt_tlatesubclamp4cols)(int sx, int yl, int yh); void (*rt_tlaterevsubclamp4cols)(int sx, int yl, int yh); -void (*rt_initcols)(canvas_pixel_t *buffer); +void (*rt_initcols)(BYTE *buffer); // // R_DrawColumn @@ -158,7 +159,7 @@ DWORD *dc_destblend; // blending lookups // first pixel in a column (possibly virtual) const BYTE* dc_source; -canvas_pixel_t* dc_dest; +BYTE* dc_dest; int dc_count; DWORD vplce[4]; @@ -236,7 +237,7 @@ void R_InitShadeMaps() void R_DrawColumnP_C (void) { int count; - canvas_pixel_t* dest; + BYTE* dest; fixed_t frac; fixed_t fracstep; @@ -281,7 +282,7 @@ void R_DrawColumnP_C (void) void R_DrawColumnP_RGBA_C() { int count; - canvas_pixel_t* dest; + uint32_t* dest; fixed_t frac; fixed_t fracstep; @@ -292,7 +293,7 @@ void R_DrawColumnP_RGBA_C() return; // Framebuffer destination address. - dest = dc_dest; + dest = (uint32_t*)dc_dest; uint32_t light = calc_light_multiplier(dc_light); @@ -328,7 +329,7 @@ void R_DrawColumnP_RGBA_C() void R_FillColumnP_C (void) { int count; - canvas_pixel_t* dest; + BYTE* dest; count = dc_count; @@ -352,14 +353,14 @@ void R_FillColumnP_C (void) void R_FillColumnP_RGBA() { int count; - canvas_pixel_t* dest; + uint32_t* dest; count = dc_count; if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; uint32_t light = calc_light_multiplier(dc_light); @@ -378,7 +379,7 @@ void R_FillColumnP_RGBA() void R_FillAddColumn_C (void) { int count; - canvas_pixel_t *dest; + BYTE *dest; count = dc_count; if (count <= 0) @@ -405,13 +406,13 @@ void R_FillAddColumn_C (void) void R_FillAddColumn_RGBA_C() { int count; - canvas_pixel_t *dest; + uint32_t *dest; count = dc_count; if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; int pitch = dc_pitch; uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; @@ -436,7 +437,7 @@ void R_FillAddColumn_RGBA_C() void R_FillAddClampColumn_C (void) { int count; - canvas_pixel_t *dest; + BYTE *dest; count = dc_count; if (count <= 0) @@ -469,13 +470,13 @@ void R_FillAddClampColumn_C (void) void R_FillAddClampColumn_RGBA() { int count; - canvas_pixel_t *dest; + uint32_t *dest; count = dc_count; if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; int pitch = dc_pitch; uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; @@ -500,7 +501,7 @@ void R_FillAddClampColumn_RGBA() void R_FillSubClampColumn_C (void) { int count; - canvas_pixel_t *dest; + BYTE *dest; count = dc_count; if (count <= 0) @@ -532,13 +533,13 @@ void R_FillSubClampColumn_C (void) void R_FillSubClampColumn_RGBA() { int count; - canvas_pixel_t *dest; + uint32_t *dest; count = dc_count; if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; int pitch = dc_pitch; uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; @@ -563,7 +564,7 @@ void R_FillSubClampColumn_RGBA() void R_FillRevSubClampColumn_C (void) { int count; - canvas_pixel_t *dest; + BYTE *dest; count = dc_count; if (count <= 0) @@ -595,13 +596,13 @@ void R_FillRevSubClampColumn_C (void) void R_FillRevSubClampColumn_RGBA() { int count; - canvas_pixel_t *dest; + uint32_t *dest; count = dc_count; if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; int pitch = dc_pitch; uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; @@ -673,7 +674,7 @@ void R_InitFuzzTable (int fuzzoff) void R_DrawFuzzColumnP_C (void) { int count; - canvas_pixel_t *dest; + BYTE *dest; // Adjust borders. Low... if (dc_yl == 0) @@ -745,7 +746,7 @@ void R_DrawFuzzColumnP_C (void) void R_DrawFuzzColumnP_RGBA_C() { int count; - canvas_pixel_t *dest; + uint32_t *dest; // Adjust borders. Low... if (dc_yl == 0) @@ -763,7 +764,7 @@ void R_DrawFuzzColumnP_RGBA_C() count++; - dest = ylookup[dc_yl] + dc_x + dc_destorg; + dest = ylookup[dc_yl] + dc_x + (uint32_t*)dc_destorg; // Note: this implementation assumes this function is only used for the pinky shadow effect (i.e. no other fancy colormap than black) // I'm not sure if this is really always the case or not. @@ -890,7 +891,7 @@ algorithm that uses RGB tables. void R_DrawAddColumnP_C (void) { int count; - canvas_pixel_t *dest; + BYTE *dest; fixed_t frac; fixed_t fracstep; @@ -928,7 +929,7 @@ void R_DrawAddColumnP_C (void) void R_DrawAddColumnP_RGBA_C() { int count; - canvas_pixel_t *dest; + uint32_t *dest; fixed_t frac; fixed_t fracstep; @@ -936,7 +937,7 @@ void R_DrawAddColumnP_RGBA_C() if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; @@ -980,7 +981,7 @@ void R_DrawAddColumnP_RGBA_C() void R_DrawTranslatedColumnP_C (void) { int count; - canvas_pixel_t* dest; + BYTE* dest; fixed_t frac; fixed_t fracstep; @@ -1012,7 +1013,7 @@ void R_DrawTranslatedColumnP_C (void) void R_DrawTranslatedColumnP_RGBA_C() { int count; - canvas_pixel_t* dest; + uint32_t* dest; fixed_t frac; fixed_t fracstep; @@ -1022,7 +1023,7 @@ void R_DrawTranslatedColumnP_RGBA_C() uint32_t light = calc_light_multiplier(dc_light); - dest = dc_dest; + dest = (uint32_t*)dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; @@ -1047,7 +1048,7 @@ void R_DrawTranslatedColumnP_RGBA_C() void R_DrawTlatedAddColumnP_C() { int count; - canvas_pixel_t *dest; + BYTE *dest; fixed_t frac; fixed_t fracstep; @@ -1086,7 +1087,7 @@ void R_DrawTlatedAddColumnP_C() void R_DrawTlatedAddColumnP_RGBA_C() { int count; - canvas_pixel_t *dest; + uint32_t *dest; fixed_t frac; fixed_t fracstep; @@ -1096,7 +1097,7 @@ void R_DrawTlatedAddColumnP_RGBA_C() uint32_t light = calc_light_multiplier(dc_light); - dest = dc_dest; + dest = (uint32_t*)dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; @@ -1135,7 +1136,7 @@ void R_DrawTlatedAddColumnP_RGBA_C() void R_DrawShadedColumnP_C (void) { int count; - canvas_pixel_t *dest; + BYTE *dest; fixed_t frac, fracstep; count = dc_count; @@ -1170,7 +1171,7 @@ void R_DrawShadedColumnP_C (void) void R_DrawShadedColumnP_RGBA_C() { int count; - canvas_pixel_t *dest; + uint32_t *dest; fixed_t frac, fracstep; count = dc_count; @@ -1178,7 +1179,7 @@ void R_DrawShadedColumnP_RGBA_C() if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; @@ -1217,7 +1218,7 @@ void R_DrawShadedColumnP_RGBA_C() void R_DrawAddClampColumnP_C () { int count; - canvas_pixel_t *dest; + BYTE *dest; fixed_t frac; fixed_t fracstep; @@ -1234,24 +1235,20 @@ void R_DrawAddClampColumnP_C () const BYTE *source = dc_source; BYTE *colormap = dc_colormap; int pitch = dc_pitch; - uint32_t light = calc_light_multiplier(dc_light); + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; do { - uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; + DWORD a = fg2rgb[colormap[source[frac >> FRACBITS]]] + bg2rgb[*dest]; + DWORD b = a; - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + a |= 0x01f07c1f; + b &= 0x40100400; + a &= 0x3fffffff; + b = b - (b >> 5); + a |= b; + *dest = RGB32k.All[a & (a >> 15)]; dest += pitch; frac += fracstep; } while (--count); @@ -1261,7 +1258,7 @@ void R_DrawAddClampColumnP_C () void R_DrawAddClampColumnP_RGBA_C() { int count; - canvas_pixel_t *dest; + uint32_t *dest; fixed_t frac; fixed_t fracstep; @@ -1269,7 +1266,7 @@ void R_DrawAddClampColumnP_RGBA_C() if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; @@ -1306,7 +1303,7 @@ void R_DrawAddClampColumnP_RGBA_C() void R_DrawAddClampTranslatedColumnP_C () { int count; - canvas_pixel_t *dest; + BYTE *dest; fixed_t frac; fixed_t fracstep; @@ -1347,7 +1344,7 @@ void R_DrawAddClampTranslatedColumnP_C () void R_DrawAddClampTranslatedColumnP_RGBA_C() { int count; - canvas_pixel_t *dest; + uint32_t *dest; fixed_t frac; fixed_t fracstep; @@ -1355,7 +1352,7 @@ void R_DrawAddClampTranslatedColumnP_RGBA_C() if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; @@ -1393,7 +1390,7 @@ void R_DrawAddClampTranslatedColumnP_RGBA_C() void R_DrawSubClampColumnP_C () { int count; - canvas_pixel_t *dest; + BYTE *dest; fixed_t frac; fixed_t fracstep; @@ -1432,7 +1429,7 @@ void R_DrawSubClampColumnP_C () void R_DrawSubClampColumnP_RGBA_C() { int count; - canvas_pixel_t *dest; + uint32_t *dest; fixed_t frac; fixed_t fracstep; @@ -1440,7 +1437,7 @@ void R_DrawSubClampColumnP_RGBA_C() if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; @@ -1477,7 +1474,7 @@ void R_DrawSubClampColumnP_RGBA_C() void R_DrawSubClampTranslatedColumnP_C () { int count; - canvas_pixel_t *dest; + BYTE *dest; fixed_t frac; fixed_t fracstep; @@ -1517,7 +1514,7 @@ void R_DrawSubClampTranslatedColumnP_C () void R_DrawSubClampTranslatedColumnP_RGBA_C() { int count; - canvas_pixel_t *dest; + uint32_t *dest; fixed_t frac; fixed_t fracstep; @@ -1525,7 +1522,7 @@ void R_DrawSubClampTranslatedColumnP_RGBA_C() if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; @@ -1563,7 +1560,7 @@ void R_DrawSubClampTranslatedColumnP_RGBA_C() void R_DrawRevSubClampColumnP_C () { int count; - canvas_pixel_t *dest; + BYTE *dest; fixed_t frac; fixed_t fracstep; @@ -1602,7 +1599,7 @@ void R_DrawRevSubClampColumnP_C () void R_DrawRevSubClampColumnP_RGBA_C() { int count; - canvas_pixel_t *dest; + uint32_t *dest; fixed_t frac; fixed_t fracstep; @@ -1610,7 +1607,7 @@ void R_DrawRevSubClampColumnP_RGBA_C() if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; @@ -1647,7 +1644,7 @@ void R_DrawRevSubClampColumnP_RGBA_C() void R_DrawRevSubClampTranslatedColumnP_C () { int count; - canvas_pixel_t *dest; + BYTE *dest; fixed_t frac; fixed_t fracstep; @@ -1687,7 +1684,7 @@ void R_DrawRevSubClampTranslatedColumnP_C () void R_DrawRevSubClampTranslatedColumnP_RGBA_C() { int count; - canvas_pixel_t *dest; + uint32_t *dest; fixed_t frac; fixed_t fracstep; @@ -1695,7 +1692,7 @@ void R_DrawRevSubClampTranslatedColumnP_RGBA_C() if (count <= 0) return; - dest = dc_dest; + dest = (uint32_t*)dc_dest; fracstep = dc_iscale; frac = dc_texturefrac; @@ -1855,7 +1852,7 @@ void R_DrawSpanP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + BYTE* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -1927,7 +1924,7 @@ void R_DrawSpanP_RGBA_C() dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + uint32_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -1945,7 +1942,7 @@ void R_DrawSpanP_RGBA_C() xfrac = ds_xfrac; yfrac = ds_yfrac; - dest = ylookup[ds_y] + ds_x1 + dc_destorg; + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; count = ds_x2 - ds_x1 + 1; @@ -2051,7 +2048,7 @@ void R_DrawSpanMaskedP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + BYTE* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -2114,7 +2111,7 @@ void R_DrawSpanMaskedP_RGBA_C() dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + uint32_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -2125,7 +2122,7 @@ void R_DrawSpanMaskedP_RGBA_C() xfrac = ds_xfrac; yfrac = ds_yfrac; - dest = ylookup[ds_y] + ds_x1 + dc_destorg; + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; count = ds_x2 - ds_x1 + 1; @@ -2179,7 +2176,7 @@ void R_DrawSpanTranslucentP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + BYTE* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -2241,7 +2238,7 @@ void R_DrawSpanTranslucentP_RGBA_C() dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + uint32_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -2252,7 +2249,7 @@ void R_DrawSpanTranslucentP_RGBA_C() xfrac = ds_xfrac; yfrac = ds_yfrac; - dest = ylookup[ds_y] + ds_x1 + dc_destorg; + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; count = ds_x2 - ds_x1 + 1; @@ -2323,7 +2320,7 @@ void R_DrawSpanMaskedTranslucentP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + BYTE* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -2399,7 +2396,7 @@ void R_DrawSpanMaskedTranslucentP_RGBA_C() dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + uint32_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -2412,7 +2409,7 @@ void R_DrawSpanMaskedTranslucentP_RGBA_C() xfrac = ds_xfrac; yfrac = ds_yfrac; - dest = ylookup[ds_y] + ds_x1 + dc_destorg; + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; count = ds_x2 - ds_x1 + 1; @@ -2491,7 +2488,7 @@ void R_DrawSpanAddClampP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + BYTE* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -2561,7 +2558,7 @@ void R_DrawSpanAddClampP_RGBA_C() dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + uint32_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -2574,7 +2571,7 @@ void R_DrawSpanAddClampP_RGBA_C() xfrac = ds_xfrac; yfrac = ds_yfrac; - dest = ylookup[ds_y] + ds_x1 + dc_destorg; + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; count = ds_x2 - ds_x1 + 1; @@ -2643,7 +2640,7 @@ void R_DrawSpanMaskedAddClampP_C (void) dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + BYTE* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -2725,7 +2722,7 @@ void R_DrawSpanMaskedAddClampP_RGBA_C() dsfixed_t yfrac; dsfixed_t xstep; dsfixed_t ystep; - canvas_pixel_t* dest; + uint32_t* dest; const BYTE* source = ds_source; const BYTE* colormap = ds_colormap; int count; @@ -2738,7 +2735,7 @@ void R_DrawSpanMaskedAddClampP_RGBA_C() xfrac = ds_xfrac; yfrac = ds_yfrac; - dest = ylookup[ds_y] + ds_x1 + dc_destorg; + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; count = ds_x2 - ds_x1 + 1; @@ -2819,7 +2816,7 @@ void R_FillSpan_C (void) void R_FillSpan_RGBA() { - canvas_pixel_t *dest = ylookup[ds_y] + ds_x1 + dc_destorg; + uint32_t *dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; int count = (ds_x2 - ds_x1 + 1); uint32_t light = calc_light_multiplier(ds_light); uint32_t color = shade_pal_index(ds_color, light); @@ -2843,7 +2840,7 @@ extern "C" void R_SetupDrawSlabC(const BYTE *colormap) slabcolormap = colormap; } -extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, canvas_pixel_t *p) +extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p) { int x; const BYTE *colormap = slabcolormap; @@ -3017,7 +3014,7 @@ DWORD vlinec1 () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int bits = vlinebits; int pitch = dc_pitch; @@ -3039,7 +3036,7 @@ DWORD vlinec1_RGBA() BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int bits = vlinebits; int pitch = dc_pitch; @@ -3058,7 +3055,7 @@ DWORD vlinec1_RGBA() #if !defined(X86_ASM) void vlinec4 () { - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int count = dc_count; int bits = vlinebits; DWORD place; @@ -3076,7 +3073,7 @@ void vlinec4 () void vlinec4_RGBA() { - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int count = dc_count; int bits = vlinebits; @@ -3162,7 +3159,7 @@ DWORD mvlinec1 () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int bits = mvlinebits; int pitch = dc_pitch; @@ -3188,7 +3185,7 @@ DWORD mvlinec1_RGBA() BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int bits = mvlinebits; int pitch = dc_pitch; @@ -3211,7 +3208,7 @@ DWORD mvlinec1_RGBA() #if !defined(X86_ASM) void mvlinec4 () { - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int count = dc_count; int bits = mvlinebits; DWORD place; @@ -3230,7 +3227,7 @@ void mvlinec4 () void mvlinec4_RGBA() { - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int count = dc_count; int bits = mvlinebits; DWORD place; @@ -3260,7 +3257,7 @@ extern int wallshade; static void R_DrawFogBoundarySection (int y, int y2, int x1) { BYTE *colormap = dc_colormap; - canvas_pixel_t *dest = ylookup[y] + dc_destorg; + BYTE *dest = ylookup[y] + dc_destorg; for (; y < y2; ++y) { @@ -3278,7 +3275,7 @@ static void R_DrawFogBoundaryLine (int y, int x) { int x2 = spanend[y]; BYTE *colormap = dc_colormap; - canvas_pixel_t *dest = ylookup[y] + dc_destorg; + BYTE *dest = ylookup[y] + dc_destorg; do { @@ -3380,7 +3377,7 @@ void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip) static void R_DrawFogBoundarySection_RGBA(int y, int y2, int x1) { BYTE *colormap = dc_colormap; - canvas_pixel_t *dest = ylookup[y] + dc_destorg; + uint32_t *dest = ylookup[y] + (uint32_t*)dc_destorg; uint32_t light = calc_light_multiplier(dc_light); @@ -3400,7 +3397,7 @@ static void R_DrawFogBoundaryLine_RGBA(int y, int x) { int x2 = spanend[y]; BYTE *colormap = dc_colormap; - canvas_pixel_t *dest = ylookup[y] + dc_destorg; + uint32_t *dest = ylookup[y] + (uint32_t*)dc_destorg; uint32_t light = calc_light_multiplier(dc_light); @@ -3518,7 +3515,7 @@ fixed_t tmvline1_add_C () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; @@ -3551,7 +3548,7 @@ fixed_t tmvline1_add_RGBA() BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; @@ -3589,7 +3586,7 @@ fixed_t tmvline1_add_RGBA() void tmvline4_add_C () { - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int count = dc_count; int bits = tmvlinebits; @@ -3622,13 +3619,10 @@ void tmvline4_add_C () void tmvline4_add_RGBA() { - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int count = dc_count; int bits = tmvlinebits; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - uint32_t light[4]; light[0] = calc_light_multiplier(palookuplight[0]); light[1] = calc_light_multiplier(palookuplight[1]); @@ -3670,7 +3664,7 @@ fixed_t tmvline1_addclamp_C () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; @@ -3708,7 +3702,7 @@ fixed_t tmvline1_addclamp_RGBA() BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; @@ -3746,7 +3740,7 @@ fixed_t tmvline1_addclamp_RGBA() void tmvline4_addclamp_C () { - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int count = dc_count; int bits = tmvlinebits; @@ -3778,7 +3772,7 @@ void tmvline4_addclamp_C () void tmvline4_addclamp_RGBA() { - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int count = dc_count; int bits = tmvlinebits; @@ -3823,7 +3817,7 @@ fixed_t tmvline1_subclamp_C () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; @@ -3858,7 +3852,7 @@ fixed_t tmvline1_subclamp_RGBA() BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; @@ -3893,7 +3887,7 @@ fixed_t tmvline1_subclamp_RGBA() void tmvline4_subclamp_C () { - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int count = dc_count; int bits = tmvlinebits; @@ -3924,7 +3918,7 @@ void tmvline4_subclamp_C () void tmvline4_subclamp_RGBA() { - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int count = dc_count; int bits = tmvlinebits; @@ -3969,7 +3963,7 @@ fixed_t tmvline1_revsubclamp_C () BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; @@ -4004,7 +3998,7 @@ fixed_t tmvline1_revsubclamp_RGBA() BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int bits = tmvlinebits; int pitch = dc_pitch; @@ -4039,7 +4033,7 @@ fixed_t tmvline1_revsubclamp_RGBA() void tmvline4_revsubclamp_C () { - canvas_pixel_t *dest = dc_dest; + BYTE *dest = dc_dest; int count = dc_count; int bits = tmvlinebits; @@ -4070,7 +4064,7 @@ void tmvline4_revsubclamp_C () void tmvline4_revsubclamp_RGBA() { - canvas_pixel_t *dest = dc_dest; + uint32_t *dest = (uint32_t*)dc_dest; int count = dc_count; int bits = tmvlinebits; @@ -4168,6 +4162,7 @@ void R_InitColumnDrawers () R_FillColumnHoriz = R_FillColumnHorizP_RGBA_C; R_DrawFogBoundary = R_DrawFogBoundary_RGBA; + R_MapTiltedPlane = R_MapColoredPlane_RGBA; R_MapColoredPlane = R_MapColoredPlane_RGBA; R_DrawParticle = R_DrawParticle_RGBA; @@ -4262,6 +4257,7 @@ void R_InitColumnDrawers () R_FillColumnHoriz = R_FillColumnHorizP_C; R_DrawFogBoundary = R_DrawFogBoundary_C; + R_MapTiltedPlane = R_MapColoredPlane_C; R_MapColoredPlane = R_MapColoredPlane_C; R_DrawParticle = R_DrawParticle_C; diff --git a/src/r_draw.h b/src/r_draw.h index db109dbee..2348914b6 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -45,7 +45,7 @@ extern "C" DWORD *dc_destblend; // first pixel in a column extern "C" const BYTE* dc_source; -extern "C" canvas_pixel_t *dc_dest, *dc_destorg; +extern "C" BYTE* dc_dest, *dc_destorg; extern "C" int dc_count; extern "C" DWORD vplce[4]; @@ -55,7 +55,7 @@ extern "C" fixed_t palookuplight[4]; extern "C" const BYTE* bufplce[4]; // [RH] Temporary buffer for column drawing -extern "C" canvas_pixel_t *dc_temp; +extern "C" BYTE *dc_temp; extern "C" unsigned int dc_tspans[4][MAXHEIGHT]; extern "C" unsigned int *dc_ctspan[4]; extern "C" unsigned int horizspans[4]; @@ -228,13 +228,13 @@ extern void (*rt_tlateaddclamp4cols)(int sx, int yl, int yh); extern void (*rt_tlatesubclamp4cols)(int sx, int yl, int yh); extern void (*rt_tlaterevsubclamp4cols)(int sx, int yl, int yh); -extern void (*rt_initcols)(canvas_pixel_t *buffer); +extern void (*rt_initcols)(BYTE *buffer); void rt_draw4cols (int sx); // [RH] Preps the temporary horizontal buffer. -void rt_initcols_pal (canvas_pixel_t *buffer); -void rt_initcols_rgba (canvas_pixel_t *buffer); +void rt_initcols_pal (BYTE *buffer); +void rt_initcols_rgba (BYTE *buffer); extern void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip); @@ -313,7 +313,7 @@ void R_FillSpan_RGBA_C(void); #endif extern "C" void R_SetupDrawSlab(const BYTE *colormap); -extern "C" void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, canvas_pixel_t *p); +extern "C" void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); extern "C" int ds_y; extern "C" int ds_x1; diff --git a/src/r_drawt.cpp b/src/r_drawt.cpp index 9520f59b3..485ed7ab3 100644 --- a/src/r_drawt.cpp +++ b/src/r_drawt.cpp @@ -47,6 +47,8 @@ #include "r_things.h" #include "v_video.h" +EXTERN_CVAR(Bool, r_swtruecolor) + // I should have commented this stuff better. // // dc_temp is the buffer R_DrawColumnHoriz writes into. @@ -57,8 +59,8 @@ // dc_ctspan is advanced while drawing into dc_temp. // horizspan is advanced up to dc_ctspan when drawing from dc_temp to the screen. -canvas_pixel_t dc_tempbuff[MAXHEIGHT*4]; -canvas_pixel_t *dc_temp; +BYTE dc_tempbuff[MAXHEIGHT*4]; +BYTE *dc_temp; unsigned int dc_tspans[4][MAXHEIGHT]; unsigned int *dc_ctspan[4]; unsigned int *horizspan[4]; @@ -73,8 +75,8 @@ extern "C" void R_SetupAddClampCol(); // Copies one span at hx to the screen at sx. void rt_copy1col_c (int hx, int sx, int yl, int yh) { - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -148,8 +150,8 @@ void rt_copy4cols_c (int sx, int yl, int yh) void rt_map1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -183,8 +185,8 @@ void rt_map1col_c (int hx, int sx, int yl, int yh) void rt_map4cols_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -227,7 +229,7 @@ void rt_map4cols_c (int sx, int yl, int yh) void rt_Translate1col(const BYTE *translation, int hx, int yl, int yh) { int count = yh - yl + 1; - canvas_pixel_t *source = &dc_temp[yl*4 + hx]; + BYTE *source = &dc_temp[yl*4 + hx]; // Things we do to hit the compiler's optimizer with a clue bat: // 1. Parallelism is explicitly spelled out by using a separate @@ -274,7 +276,7 @@ void rt_Translate1col(const BYTE *translation, int hx, int yl, int yh) void rt_Translate4cols(const BYTE *translation, int yl, int yh) { int count = yh - yl + 1; - canvas_pixel_t *source = &dc_temp[yl*4]; + BYTE *source = &dc_temp[yl*4]; int c0, c1; BYTE b0, b1; @@ -330,8 +332,8 @@ void rt_tlate4cols_c (int sx, int yl, int yh) void rt_add1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -364,8 +366,8 @@ void rt_add1col_c (int hx, int sx, int yl, int yh) void rt_add4cols_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -435,8 +437,8 @@ void rt_tlateadd4cols_c (int sx, int yl, int yh) void rt_shaded1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -467,8 +469,8 @@ void rt_shaded1col_c (int hx, int sx, int yl, int yh) void rt_shaded4cols_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -513,8 +515,8 @@ void rt_shaded4cols_c (int sx, int yl, int yh) void rt_addclamp1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -550,8 +552,8 @@ void rt_addclamp1col_c (int hx, int sx, int yl, int yh) void rt_addclamp4cols_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -629,8 +631,8 @@ void rt_tlateaddclamp4cols_c (int sx, int yl, int yh) void rt_subclamp1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -664,8 +666,8 @@ void rt_subclamp1col_c (int hx, int sx, int yl, int yh) void rt_subclamp4cols_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -738,8 +740,8 @@ void rt_tlatesubclamp4cols_c (int sx, int yl, int yh) void rt_revsubclamp1col_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -773,8 +775,8 @@ void rt_revsubclamp1col_c (int hx, int sx, int yl, int yh) void rt_revsubclamp4cols_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + BYTE *source; + BYTE *dest; int count; int pitch; @@ -1007,7 +1009,7 @@ void rt_draw4cols (int sx) // Before each pass through a rendering loop that uses these routines, // call this function to set up the span pointers. -void rt_initcols_pal (canvas_pixel_t *buff) +void rt_initcols_pal (BYTE *buff) { int y; @@ -1021,7 +1023,7 @@ void rt_initcols_pal (canvas_pixel_t *buff) void R_DrawColumnHorizP_C (void) { int count = dc_count; - canvas_pixel_t *dest; + BYTE *dest; fixed_t fracstep; fixed_t frac; @@ -1082,7 +1084,7 @@ void R_FillColumnHorizP_C (void) { int count = dc_count; BYTE color = dc_color; - canvas_pixel_t *dest; + BYTE *dest; if (count <= 0) return; @@ -1113,6 +1115,7 @@ void R_FillColumnHorizP_C (void) void R_DrawMaskedColumnHoriz (const BYTE *column, const FTexture::Span *span) { + int pixelsize = r_swtruecolor ? 4 : 1; const fixed_t texturemid = FLOAT2FIXED(dc_texturemid); while (span->Length != 0) { @@ -1182,7 +1185,7 @@ void R_DrawMaskedColumnHoriz (const BYTE *column, const FTexture::Span *span) } } dc_source = column + top; - dc_dest = ylookup[dc_yl] + dc_x + dc_destorg; + dc_dest = (ylookup[dc_yl] + dc_x) * pixelsize + dc_destorg; dc_count = dc_yh - dc_yl + 1; hcolfunc_pre (); } diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index e8111be8f..872cb4b89 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -43,8 +43,8 @@ #include "r_things.h" #include "v_video.h" -canvas_pixel_t dc_temp_rgbabuff_rgba[MAXHEIGHT*4]; -canvas_pixel_t *dc_temp_rgba; +uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT*4]; +uint32_t *dc_temp_rgba; // Defined in r_draw_t.cpp: extern unsigned int dc_tspans[4][MAXHEIGHT]; @@ -54,8 +54,8 @@ extern unsigned int *horizspan[4]; // Copies one span at hx to the screen at sx. void rt_copy1col_RGBA_c (int hx, int sx, int yl, int yh) { - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -64,7 +64,7 @@ void rt_copy1col_RGBA_c (int hx, int sx, int yl, int yh) return; count++; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; @@ -106,8 +106,8 @@ void rt_copy4cols_RGBA_c (int sx, int yl, int yh) void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -119,7 +119,7 @@ void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) uint32_t light = calc_light_multiplier(dc_light); colormap = dc_colormap; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; @@ -143,8 +143,8 @@ void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) void rt_map4cols_RGBA_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -156,7 +156,7 @@ void rt_map4cols_RGBA_c (int sx, int yl, int yh) uint32_t light = calc_light_multiplier(dc_light); colormap = dc_colormap; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; @@ -188,7 +188,7 @@ void rt_map4cols_RGBA_c (int sx, int yl, int yh) void rt_Translate1col_RGBA_c(const BYTE *translation, int hx, int yl, int yh) { int count = yh - yl + 1; - canvas_pixel_t *source = &dc_temp_rgba[yl*4 + hx]; + uint32_t *source = &dc_temp_rgba[yl*4 + hx]; // Things we do to hit the compiler's optimizer with a clue bat: // 1. Parallelism is explicitly spelled out by using a separate @@ -235,7 +235,7 @@ void rt_Translate1col_RGBA_c(const BYTE *translation, int hx, int yl, int yh) void rt_Translate4cols_RGBA_c(const BYTE *translation, int yl, int yh) { int count = yh - yl + 1; - canvas_pixel_t *source = &dc_temp_rgba[yl*4]; + uint32_t *source = &dc_temp_rgba[yl*4]; int c0, c1; BYTE b0, b1; @@ -291,8 +291,8 @@ void rt_tlate4cols_RGBA_c (int sx, int yl, int yh) void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -301,7 +301,7 @@ void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) return; count++; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; colormap = dc_colormap; @@ -333,8 +333,8 @@ void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) void rt_add4cols_RGBA_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -343,7 +343,7 @@ void rt_add4cols_RGBA_c (int sx, int yl, int yh) return; count++; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; colormap = dc_colormap; @@ -392,8 +392,8 @@ void rt_tlateadd4cols_RGBA_c(int sx, int yl, int yh) void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -403,7 +403,7 @@ void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh) count++; colormap = dc_colormap; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; @@ -434,8 +434,8 @@ void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh) void rt_shaded4cols_RGBA_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -445,7 +445,7 @@ void rt_shaded4cols_RGBA_c (int sx, int yl, int yh) count++; colormap = dc_colormap; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; @@ -479,8 +479,8 @@ void rt_shaded4cols_RGBA_c (int sx, int yl, int yh) void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -489,7 +489,7 @@ void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) return; count++; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; colormap = dc_colormap; @@ -520,8 +520,8 @@ void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -530,7 +530,7 @@ void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) return; count++; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; colormap = dc_colormap; @@ -578,8 +578,8 @@ void rt_tlateaddclamp4cols_RGBA_c (int sx, int yl, int yh) void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -588,7 +588,7 @@ void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) return; count++; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; colormap = dc_colormap; @@ -619,8 +619,8 @@ void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -629,7 +629,7 @@ void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) return; count++; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; colormap = dc_colormap; @@ -678,8 +678,8 @@ void rt_tlatesubclamp4cols_RGBA_c (int sx, int yl, int yh) void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -690,7 +690,7 @@ void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; colormap = dc_colormap; @@ -721,8 +721,8 @@ void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) { BYTE *colormap; - canvas_pixel_t *source; - canvas_pixel_t *dest; + uint32_t *source; + uint32_t *dest; int count; int pitch; @@ -733,7 +733,7 @@ void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; - dest = ylookup[yl] + sx + dc_destorg; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; colormap = dc_colormap; @@ -780,11 +780,11 @@ void rt_tlaterevsubclamp4cols_RGBA_c (int sx, int yl, int yh) // Before each pass through a rendering loop that uses these routines, // call this function to set up the span pointers. -void rt_initcols_rgba (canvas_pixel_t *buff) +void rt_initcols_rgba (BYTE *buff) { int y; - dc_temp_rgba = buff == NULL ? dc_temp_rgbabuff_rgba : buff; + dc_temp_rgba = buff == NULL ? dc_temp_rgbabuff_rgba : (uint32_t*)buff; for (y = 3; y >= 0; y--) horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0]; } @@ -794,7 +794,7 @@ void rt_initcols_rgba (canvas_pixel_t *buff) void R_DrawColumnHorizP_RGBA_C (void) { int count = dc_count; - canvas_pixel_t *dest; + uint32_t *dest; fixed_t fracstep; fixed_t frac; @@ -855,7 +855,7 @@ void R_FillColumnHorizP_RGBA_C (void) { int count = dc_count; BYTE color = dc_color; - canvas_pixel_t *dest; + uint32_t *dest; if (count <= 0) return; diff --git a/src/r_main.cpp b/src/r_main.cpp index d85cd62a0..9dc61eea3 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -577,9 +577,12 @@ void R_HighlightPortal (PortalDrawseg* pds) // [ZZ] NO OVERFLOW CHECKS HERE // I believe it won't break. if it does, blame me. :( + if (r_swtruecolor) // Assuming this is just a debug function + return; + BYTE color = (BYTE)BestColor((DWORD *)GPalette.BaseColors, 255, 0, 0, 0, 255); - canvas_pixel_t* pixels = RenderTarget->GetBuffer(); + BYTE* pixels = RenderTarget->GetBuffer(); // top edge for (int x = pds->x1; x < pds->x2; x++) { @@ -624,12 +627,26 @@ void R_EnterPortal (PortalDrawseg* pds, int depth) int Ytop = pds->ceilingclip[x-pds->x1]; int Ybottom = pds->floorclip[x-pds->x1]; - canvas_pixel_t *dest = RenderTarget->GetBuffer() + x + Ytop * spacing; - - for (int y = Ytop; y <= Ybottom; y++) + if (r_swtruecolor) { - *dest = color; - dest += spacing; + uint32_t *dest = (uint32_t*)RenderTarget->GetBuffer() + x + Ytop * spacing; + + uint32_t c = GPalette.BaseColors[color].d; + for (int y = Ytop; y <= Ybottom; y++) + { + *dest = c; + dest += spacing; + } + } + else + { + BYTE *dest = RenderTarget->GetBuffer() + x + Ytop * spacing; + + for (int y = Ytop; y <= Ybottom; y++) + { + *dest = color; + dest += spacing; + } } } @@ -795,10 +812,11 @@ void R_EnterPortal (PortalDrawseg* pds, int depth) void R_SetupBuffer () { - static canvas_pixel_t *lastbuff = NULL; + static BYTE *lastbuff = NULL; int pitch = RenderTarget->GetPitch(); - canvas_pixel_t *lineptr = RenderTarget->GetBuffer() + viewwindowy*pitch + viewwindowx; + int pixelsize = r_swtruecolor ? 4 : 1; + BYTE *lineptr = RenderTarget->GetBuffer() + (viewwindowy*pitch + viewwindowx) * pixelsize; if (dc_pitch != pitch || lineptr != lastbuff) { diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 8d0c882ba..a71590c9d 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -354,12 +354,12 @@ void R_CalcTiltedLighting (double lval, double lend, int width) // //========================================================================== -void R_MapTiltedPlane (int y, int x1) +void R_MapTiltedPlane_C (int y, int x1) { int x2 = spanend[y]; int width = x2 - x1; double iz, uz, vz; - canvas_pixel_t *fb; + BYTE *fb; DWORD u, v; int i; @@ -478,6 +478,130 @@ void R_MapTiltedPlane (int y, int x1) #endif } +void R_MapTiltedPlane_RGBA (int y, int x1) +{ + int x2 = spanend[y]; + int width = x2 - x1; + double iz, uz, vz; + uint32_t *fb; + DWORD u, v; + int i; + + iz = plane_sz[2] + plane_sz[1]*(centery-y) + plane_sz[0]*(x1-centerx); + + // Lighting is simple. It's just linear interpolation from start to end + if (plane_shade) + { + uz = (iz + plane_sz[0]*width) * planelightfloat; + vz = iz * planelightfloat; + R_CalcTiltedLighting (vz, uz, width); + } + + uz = plane_su[2] + plane_su[1]*(centery-y) + plane_su[0]*(x1-centerx); + vz = plane_sv[2] + plane_sv[1]*(centery-y) + plane_sv[0]*(x1-centerx); + + fb = ylookup[y] + x1 + (uint32_t*)dc_destorg; + + BYTE vshift = 32 - ds_ybits; + BYTE ushift = vshift - ds_xbits; + int umask = ((1 << ds_xbits) - 1) << ds_ybits; + +#if 0 // The "perfect" reference version of this routine. Pretty slow. + // Use it only to see how things are supposed to look. + i = 0; + do + { + double z = 1.f/iz; + + u = SQWORD(uz*z) + pviewx; + v = SQWORD(vz*z) + pviewy; + ds_colormap = tiltlighting[i]; + ds_light = 0; + fb[i++] = ds_colormap[ds_source[(v >> vshift) | ((u >> ushift) & umask)]]; + iz += plane_sz[0]; + uz += plane_su[0]; + vz += plane_sv[0]; + } while (--width >= 0); +#else +//#define SPANSIZE 32 +//#define INVSPAN 0.03125f +//#define SPANSIZE 8 +//#define INVSPAN 0.125f +#define SPANSIZE 16 +#define INVSPAN 0.0625f + + double startz = 1.f/iz; + double startu = uz*startz; + double startv = vz*startz; + double izstep, uzstep, vzstep; + + izstep = plane_sz[0] * SPANSIZE; + uzstep = plane_su[0] * SPANSIZE; + vzstep = plane_sv[0] * SPANSIZE; + x1 = 0; + width++; + + while (width >= SPANSIZE) + { + iz += izstep; + uz += uzstep; + vz += vzstep; + + double endz = 1.f/iz; + double endu = uz*endz; + double endv = vz*endz; + DWORD stepu = SQWORD((endu - startu) * INVSPAN); + DWORD stepv = SQWORD((endv - startv) * INVSPAN); + u = SQWORD(startu) + pviewx; + v = SQWORD(startv) + pviewy; + + for (i = SPANSIZE-1; i >= 0; i--) + { + fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); + x1++; + u += stepu; + v += stepv; + } + startu = endu; + startv = endv; + width -= SPANSIZE; + } + if (width > 0) + { + if (width == 1) + { + u = SQWORD(startu); + v = SQWORD(startv); + fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); + } + else + { + double left = width; + iz += plane_sz[0] * left; + uz += plane_su[0] * left; + vz += plane_sv[0] * left; + + double endz = 1.f/iz; + double endu = uz*endz; + double endv = vz*endz; + left = 1.f/left; + DWORD stepu = SQWORD((endu - startu) * left); + DWORD stepv = SQWORD((endv - startv) * left); + u = SQWORD(startu) + pviewx; + v = SQWORD(startv) + pviewy; + + for (; width != 0; width--) + { + fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); + x1++; + u += stepu; + v += stepv; + } + } + } +#endif +} + //========================================================================== // // R_MapColoredPlane @@ -491,7 +615,7 @@ void R_MapColoredPlane_C (int y, int x1) void R_MapColoredPlane_RGBA(int y, int x1) { - canvas_pixel_t *dest = ylookup[y] + x1 + dc_destorg; + uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; int count = (spanend[y] - x1 + 1); uint32_t light = calc_light_multiplier(ds_light); uint32_t color = shade_pal_index(ds_color, light); diff --git a/src/r_plane.h b/src/r_plane.h index ac63501e3..7505ac995 100644 --- a/src/r_plane.h +++ b/src/r_plane.h @@ -94,6 +94,10 @@ void R_DrawTiltedPlane (visplane_t *pl, double xscale, double yscale, fixed_t al void R_MapVisPlane (visplane_t *pl, void (*mapfunc)(int y, int x1)); extern void(*R_MapColoredPlane)(int y, int x1); +extern void(*R_MapTiltedPlane)(int y, int x1); + +void R_MapTiltedPlane_C(int y, int x1); +void R_MapTiltedPlane_RGBA(int y, int x); void R_MapColoredPlane_C(int y, int x1); void R_MapColoredPlane_RGBA(int y, int x1); diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 548cd994f..8c71f0fb7 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1071,7 +1071,7 @@ void R_RenderFakeWallRange (drawseg_t *ds, int x1, int x2) } // prevlineasm1 is like vlineasm1 but skips the loop if only drawing one pixel -inline fixed_t prevline1 (fixed_t vince, BYTE *colormap, fixed_t light, int count, fixed_t vplce, const BYTE *bufplce, canvas_pixel_t *dest) +inline fixed_t prevline1 (fixed_t vince, BYTE *colormap, fixed_t light, int count, fixed_t vplce, const BYTE *bufplce, BYTE *dest) { dc_iscale = vince; dc_colormap = colormap; @@ -1107,6 +1107,8 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l //extern cycle_t WallScanCycles; //clock (WallScanCycles); + int pixelsize = r_swtruecolor ? 4 : 1; + rw_pic->GetHeight(); // Make sure texture size is loaded fracbits = 32 - rw_pic->HeightBits; setupvline(fracbits); @@ -1144,7 +1146,7 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dc_dest = ylookup[y1ve[0]] + x + dc_destorg; + dc_dest = (ylookup[y1ve[0]] + x)*pixelsize + dc_destorg; dc_count = y2ve[0] - y1ve[0]; iscale = swal[x] * yrepeat; dc_iscale = xs_ToFixed(fracbits, iscale); @@ -1202,7 +1204,7 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l { if (!(bad & 1)) { - prevline1(vince[z],palookupoffse[z],palookuplight[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+x+z+dc_destorg); + prevline1(vince[z],palookupoffse[z],palookuplight[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+x+z)*pixelsize+dc_destorg); } bad >>= 1; } @@ -1213,23 +1215,23 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l { if (u4 > y1ve[z]) { - vplce[z] = prevline1(vince[z],palookupoffse[z], palookuplight[z],u4-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+x+z+dc_destorg); + vplce[z] = prevline1(vince[z],palookupoffse[z], palookuplight[z],u4-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+x+z)*pixelsize+dc_destorg); } } if (d4 > u4) { dc_count = d4-u4; - dc_dest = ylookup[u4]+x+dc_destorg; + dc_dest = (ylookup[u4]+x)*pixelsize+dc_destorg; dovline4(); } - canvas_pixel_t *i = x+ylookup[d4]+dc_destorg; + BYTE *i = (x+ylookup[d4])*pixelsize+dc_destorg; for (z = 0; z < 4; ++z) { if (y2ve[z] > d4) { - prevline1(vince[z],palookupoffse[0],palookuplight[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z); + prevline1(vince[z],palookupoffse[0],palookuplight[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z*pixelsize); } } } @@ -1248,7 +1250,7 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dc_dest = ylookup[y1ve[0]] + x + dc_destorg; + dc_dest = (ylookup[y1ve[0]] + x) * pixelsize + dc_destorg; dc_count = y2ve[0] - y1ve[0]; iscale = swal[x] * yrepeat; dc_iscale = xs_ToFixed(fracbits, iscale); @@ -1435,7 +1437,7 @@ static void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *d } } -inline fixed_t mvline1 (fixed_t vince, BYTE *colormap, int count, fixed_t vplce, const BYTE *bufplce, canvas_pixel_t *dest) +inline fixed_t mvline1 (fixed_t vince, BYTE *colormap, int count, fixed_t vplce, const BYTE *bufplce, BYTE *dest) { dc_iscale = vince; dc_colormap = colormap; @@ -1451,7 +1453,8 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) { int x, fracbits; - canvas_pixel_t *p; + BYTE *pixel; + int pixelsize, pixelshift; int y1ve[4], y2ve[4], u4, d4, startx, dax, z; char bad; float light = rw_light - rw_lightstep; @@ -1473,6 +1476,9 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ //extern cycle_t WallScanCycles; //clock (WallScanCycles); + pixelsize = r_swtruecolor ? 4 : 1; + pixelshift = r_swtruecolor ? 2 : 0; + rw_pic->GetHeight(); // Make sure texture size is loaded fracbits = 32- rw_pic->HeightBits; setupmvline(fracbits); @@ -1480,7 +1486,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ basecolormapdata = basecolormap->Maps; x = startx = x1; - p = x + dc_destorg; + pixel = x * pixelsize + dc_destorg; bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); if (fixed) @@ -1489,9 +1495,13 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ palookupoffse[1] = dc_colormap; palookupoffse[2] = dc_colormap; palookupoffse[3] = dc_colormap; + palookuplight[0] = 0; + palookuplight[1] = 0; + palookuplight[2] = 0; + palookuplight[3] = 0; } - for(; (x < x2) && (((size_t)p/sizeof(canvas_pixel_t)) & 3); ++x, ++p) + for(; (x < x2) && (((size_t)pixel >> pixelshift) & 3); ++x, pixel += pixelsize) { light += rw_lightstep; y1ve[0] = uwal[x];//max(uwal[x],umost[x]); @@ -1505,7 +1515,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dc_dest = ylookup[y1ve[0]] + p; + dc_dest = ylookup[y1ve[0]] * pixelsize + pixel; dc_count = y2ve[0] - y1ve[0]; iscale = swal[x] * yrepeat; dc_iscale = xs_ToFixed(fracbits, iscale); @@ -1514,7 +1524,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ domvline1(); } - for(; x < x2-3; x += 4, p+= 4) + for(; x < x2-3; x += 4, pixel += 4 * pixelsize) { bad = 0; for (z = 3, dax = x+3; z >= 0; --z, --dax) @@ -1539,7 +1549,16 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ for (z = 0; z < 4; ++z) { light += rw_lightstep; - palookupoffse[z] = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); + if (r_swtruecolor) + { + palookupoffse[z] = basecolormapdata; + palookuplight[z] = LIGHTSCALE(light, wallshade); + } + else + { + palookupoffse[z] = basecolormapdata + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + palookuplight[z] = 0; + } } } @@ -1552,7 +1571,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ { if (!(bad & 1)) { - mvline1(vince[z],palookupoffse[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+p+z); + mvline1(vince[z],palookupoffse[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); } bad >>= 1; } @@ -1563,27 +1582,27 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ { if (u4 > y1ve[z]) { - vplce[z] = mvline1(vince[z],palookupoffse[z],u4-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+p+z); + vplce[z] = mvline1(vince[z],palookupoffse[z],u4-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); } } if (d4 > u4) { dc_count = d4-u4; - dc_dest = ylookup[u4]+p; + dc_dest = ylookup[u4]*pixelsize+pixel; domvline4(); } - canvas_pixel_t *i = p+ylookup[d4]; + BYTE *i = pixel+ylookup[d4]*pixelsize; for (z = 0; z < 4; ++z) { if (y2ve[z] > d4) { - mvline1(vince[z],palookupoffse[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z); + mvline1(vince[z],palookupoffse[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z*pixelsize); } } } - for(; x < x2; ++x, ++p) + for(; x < x2; ++x, pixel += pixelsize) { light += rw_lightstep; y1ve[0] = uwal[x]; @@ -1597,7 +1616,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dc_dest = ylookup[y1ve[0]] + p; + dc_dest = ylookup[y1ve[0]]*pixelsize + pixel; dc_count = y2ve[0] - y1ve[0]; iscale = swal[x] * yrepeat; dc_iscale = xs_ToFixed(fracbits, iscale); @@ -1611,7 +1630,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ NetUpdate (); } -inline void preptmvline1 (fixed_t vince, BYTE *colormap, fixed_t light, int count, fixed_t vplce, const BYTE *bufplce, canvas_pixel_t *dest) +inline void preptmvline1 (fixed_t vince, BYTE *colormap, fixed_t light, int count, fixed_t vplce, const BYTE *bufplce, BYTE *dest) { dc_iscale = vince; dc_colormap = colormap; @@ -1628,7 +1647,8 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f fixed_t (*tmvline1)(); void (*tmvline4)(); int x, fracbits; - canvas_pixel_t *p; + BYTE *pixel; + int pixelsize, pixelshift; int y1ve[4], y2ve[4], u4, d4, startx, dax, z; char bad; float light = rw_light - rw_lightstep; @@ -1651,6 +1671,9 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f //extern cycle_t WallScanCycles; //clock (WallScanCycles); + pixelsize = r_swtruecolor ? 4 : 1; + pixelshift = r_swtruecolor ? 2 : 0; + rw_pic->GetHeight(); // Make sure texture size is loaded fracbits = 32 - rw_pic->HeightBits; setuptmvline(fracbits); @@ -1659,7 +1682,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f fixed_t centeryfrac = FLOAT2FIXED(CenterY); x = startx = x1; - p = x + dc_destorg; + pixel = x * pixelsize + dc_destorg; bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); if (fixed) @@ -1674,7 +1697,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f palookuplight[3] = 0; } - for(; (x < x2) && (((size_t)p / sizeof(canvas_pixel_t)) & 3); ++x, ++p) + for(; (x < x2) && (((size_t)pixel >> pixelshift) & 3); ++x, pixel += pixelsize) { light += rw_lightstep; y1ve[0] = uwal[x];//max(uwal[x],umost[x]); @@ -1687,7 +1710,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dc_dest = ylookup[y1ve[0]] + p; + dc_dest = ylookup[y1ve[0]] * pixelsize + pixel; dc_count = y2ve[0] - y1ve[0]; iscale = swal[x] * yrepeat; dc_iscale = xs_ToFixed(fracbits, iscale); @@ -1696,7 +1719,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f tmvline1(); } - for(; x < x2-3; x += 4, p+= 4) + for(; x < x2-3; x += 4, pixel += 4 * pixelsize) { bad = 0; for (z = 3, dax = x+3; z >= 0; --z, --dax) @@ -1742,7 +1765,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f { if (!(bad & 1)) { - preptmvline1(vince[z],palookupoffse[z],palookuplight[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+p+z); + preptmvline1(vince[z],palookupoffse[z],palookuplight[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); tmvline1(); } bad >>= 1; @@ -1754,7 +1777,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f { if (u4 > y1ve[z]) { - preptmvline1(vince[z],palookupoffse[z],palookuplight[z],u4-y1ve[z],vplce[z],bufplce[z],ylookup[y1ve[z]]+p+z); + preptmvline1(vince[z],palookupoffse[z],palookuplight[z],u4-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); vplce[z] = tmvline1(); } } @@ -1762,21 +1785,21 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f if (d4 > u4) { dc_count = d4-u4; - dc_dest = ylookup[u4]+p; + dc_dest = ylookup[u4]*pixelsize+pixel; tmvline4(); } - canvas_pixel_t *i = p+ylookup[d4]; + BYTE *i = pixel+ylookup[d4]*pixelsize; for (z = 0; z < 4; ++z) { if (y2ve[z] > d4) { - preptmvline1(vince[z],palookupoffse[0],palookuplight[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z); + preptmvline1(vince[z],palookupoffse[0],palookuplight[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z*pixelsize); tmvline1(); } } } - for(; x < x2; ++x, ++p) + for(; x < x2; ++x, pixel += pixelsize) { light += rw_lightstep; y1ve[0] = uwal[x]; @@ -1789,7 +1812,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dc_dest = ylookup[y1ve[0]] + p; + dc_dest = ylookup[y1ve[0]] * pixelsize + pixel; dc_count = y2ve[0] - y1ve[0]; iscale = swal[x] * yrepeat; dc_iscale = xs_ToFixed(fracbits, iscale); diff --git a/src/r_things.cpp b/src/r_things.cpp index 22538bd40..2abcc0e12 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -98,6 +98,7 @@ EXTERN_CVAR (Bool, st_scale) EXTERN_CVAR(Bool, r_shadercolormaps) EXTERN_CVAR(Int, r_drawfuzz) EXTERN_CVAR(Bool, r_deathcamera); +EXTERN_CVAR(Bool, r_swtruecolor) // // Sprite rotation 0 is facing the viewer, @@ -132,7 +133,7 @@ EXTERN_CVAR (Bool, r_drawvoxels) // int OffscreenBufferWidth, OffscreenBufferHeight; -canvas_pixel_t *OffscreenColorBuffer; +BYTE *OffscreenColorBuffer; FCoverageBuffer *OffscreenCoverageBuffer; // @@ -244,6 +245,7 @@ bool sprflipvert; void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *span) { + int pixelsize = r_swtruecolor ? 4 : 1; const fixed_t centeryfrac = FLOAT2FIXED(CenterY); const fixed_t texturemid = FLOAT2FIXED(dc_texturemid); while (span->Length != 0) @@ -314,7 +316,7 @@ void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *span) } } dc_source = column + top; - dc_dest = ylookup[dc_yl] + dc_x + dc_destorg; + dc_dest = (ylookup[dc_yl] + dc_x) * pixelsize + dc_destorg; dc_count = dc_yh - dc_yl + 1; colfunc (); } @@ -688,6 +690,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop // Blend the voxel, if that's what we need to do. if ((flags & ~DVF_MIRRORED) != 0) { + int pixelsize = r_swtruecolor ? 4 : 1; for (int x = 0; x < viewwidth; ++x) { if (!(flags & DVF_SPANSONLY) && (x & 3) == 0) @@ -702,7 +705,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop dc_yl = span->Start; dc_yh = span->Stop - 1; dc_count = span->Stop - span->Start; - dc_dest = ylookup[span->Start] + x + dc_destorg; + dc_dest = (ylookup[span->Start] + x) * pixelsize + dc_destorg; colfunc(); } else @@ -2602,7 +2605,7 @@ static void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis) void R_DrawParticle_C (vissprite_t *vis) { int spacing; - canvas_pixel_t *dest; + BYTE *dest; BYTE color = vis->Style.colormap[vis->startfrac]; int yl = vis->y1; int ycount = vis->y2 - yl + 1; @@ -2668,7 +2671,7 @@ void R_DrawParticle_C (vissprite_t *vis) void R_DrawParticle_RGBA(vissprite_t *vis) { int spacing; - canvas_pixel_t *dest; + uint32_t *dest; BYTE color = vis->Style.colormap[vis->startfrac]; int yl = vis->y1; int ycount = vis->y2 - yl + 1; @@ -2698,7 +2701,7 @@ void R_DrawParticle_RGBA(vissprite_t *vis) dc_x = x; if (R_ClipSpriteColumnWithPortals(vis)) continue; - dest = ylookup[yl] + x + dc_destorg; + dest = ylookup[yl] + x + (uint32_t*)dc_destorg; for (int y = 0; y < ycount; y++) { uint32_t bg_red = (*dest >> 16) & 0xff; @@ -2759,6 +2762,8 @@ void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, R_SetupDrawSlab(colormap); + int pixelsize = r_swtruecolor ? 4 : 1; + // Select mip level i = abs(DMulScale6(dasprx - globalposx, cosang, daspry - globalposy, sinang)); i = DivScale6(i, MIN(daxscale, dayscale)); @@ -3012,7 +3017,7 @@ void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, if (!(flags & DVF_OFFSCREEN)) { // Draw directly to the screen. - R_DrawSlab(xxr - xxl, yplc[xxl], z2 - z1, yinc, col, ylookup[z1] + lxt + xxl + dc_destorg); + R_DrawSlab(xxr - xxl, yplc[xxl], z2 - z1, yinc, col, (ylookup[z1] + lxt + xxl) * pixelsize + dc_destorg); } else { @@ -3243,12 +3248,12 @@ void R_CheckOffscreenBuffer(int width, int height, bool spansonly) { if (OffscreenColorBuffer == NULL) { - OffscreenColorBuffer = new canvas_pixel_t[width * height]; + OffscreenColorBuffer = new BYTE[width * height * 4]; } else if (OffscreenBufferWidth != width || OffscreenBufferHeight != height) { delete[] OffscreenColorBuffer; - OffscreenColorBuffer = new canvas_pixel_t[width * height]; + OffscreenColorBuffer = new BYTE[width * height * 4]; } } OffscreenBufferWidth = width; diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 984375f25..ed6571ad3 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -179,7 +179,7 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) fixedcolormap = dc_colormap; ESPSResult mode = R_SetPatchStyle (parms.style, parms.Alpha, 0, parms.fillcolor); - canvas_pixel_t *destorgsave = dc_destorg; + BYTE *destorgsave = dc_destorg; dc_destorg = screen->GetBuffer(); if (dc_destorg == NULL) { @@ -1021,7 +1021,7 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) if (r_swtruecolor) { - canvas_pixel_t *spot = GetBuffer() + oldyyshifted + xx; + uint32_t *spot = (uint32_t*)GetBuffer() + oldyyshifted + xx; uint32_t fg = shade_pal_index(basecolor, calc_light_multiplier(0)); uint32_t fg_red = (fg >> 16) & 0xff; @@ -1040,7 +1040,7 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) } else { - canvas_pixel_t *spot = GetBuffer() + oldyyshifted + xx; + BYTE *spot = GetBuffer() + oldyyshifted + xx; DWORD *bg2rgb = Col2RGB8[1+level]; DWORD *fg2rgb = Col2RGB8[63-level]; DWORD fg = fg2rgb[basecolor]; @@ -1091,27 +1091,62 @@ void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 real { swapvalues (x0, x1); } - memset (GetBuffer() + y0*GetPitch() + x0, palColor, deltaX+1); + if (r_swtruecolor) + { + uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0; + for (int i = 0; i <= deltaX; i++) + spot[i] = palColor; + } + else + { + memset (GetBuffer() + y0*GetPitch() + x0, palColor, deltaX+1); + } } else if (deltaX == 0) { // vertical line - canvas_pixel_t *spot = GetBuffer() + y0*GetPitch() + x0; - int pitch = GetPitch (); - do + if (r_swtruecolor) { - *spot = palColor; - spot += pitch; - } while (--deltaY != 0); + uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0; + int pitch = GetPitch(); + do + { + *spot = palColor; + spot += pitch; + } while (--deltaY != 0); + } + else + { + BYTE *spot = GetBuffer() + y0*GetPitch() + x0; + int pitch = GetPitch(); + do + { + *spot = palColor; + spot += pitch; + } while (--deltaY != 0); + } } else if (deltaX == deltaY) { // diagonal line. - canvas_pixel_t *spot = GetBuffer() + y0*GetPitch() + x0; - int advance = GetPitch() + xDir; - do + if (r_swtruecolor) { - *spot = palColor; - spot += advance; - } while (--deltaY != 0); + uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0; + int advance = GetPitch() + xDir; + do + { + *spot = palColor; + spot += advance; + } while (--deltaY != 0); + } + else + { + BYTE *spot = GetBuffer() + y0*GetPitch() + x0; + int advance = GetPitch() + xDir; + do + { + *spot = palColor; + spot += advance; + } while (--deltaY != 0); + } } else { @@ -1231,7 +1266,6 @@ void DCanvas::DrawPixel(int x, int y, int palColor, uint32 realcolor) void DCanvas::Clear (int left, int top, int right, int bottom, int palcolor, uint32 color) { int x, y; - canvas_pixel_t *dest; if (left == right || top == bottom) { @@ -1261,12 +1295,26 @@ void DCanvas::Clear (int left, int top, int right, int bottom, int palcolor, uin palcolor = PalFromRGB(color); } - dest = Buffer + top * Pitch + left; - x = right - left; - for (y = top; y < bottom; y++) + if (r_swtruecolor) { - memset(dest, palcolor, x); - dest += Pitch; + uint32_t *dest = (uint32_t*)Buffer + top * Pitch + left; + x = right - left; + for (y = top; y < bottom; y++) + { + for (int i = 0; i < x; i++) + dest[i] = palcolor; + dest += Pitch; + } + } + else + { + BYTE *dest = Buffer + top * Pitch + left; + x = right - left; + for (y = top; y < bottom; y++) + { + memset(dest, palcolor, x); + dest += Pitch; + } } } @@ -1452,11 +1500,14 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, // V_DrawBlock // Draw a linear block of pixels into the view buffer. // -void DCanvas::DrawBlock (int x, int y, int _width, int _height, const canvas_pixel_t *src) const +void DCanvas::DrawBlock (int x, int y, int _width, int _height, const BYTE *src) const { + if (r_swtruecolor) + return; + int srcpitch = _width; int destpitch; - canvas_pixel_t *dest; + BYTE *dest; if (ClipBox (x, y, _width, _height, src, srcpitch)) { @@ -1468,7 +1519,7 @@ void DCanvas::DrawBlock (int x, int y, int _width, int _height, const canvas_pix do { - memcpy (dest, src, _width * sizeof(canvas_pixel_t)); + memcpy (dest, src, _width); src += srcpitch; dest += destpitch; } while (--_height); @@ -1478,9 +1529,12 @@ void DCanvas::DrawBlock (int x, int y, int _width, int _height, const canvas_pix // V_GetBlock // Gets a linear block of pixels from the view buffer. // -void DCanvas::GetBlock (int x, int y, int _width, int _height, canvas_pixel_t *dest) const +void DCanvas::GetBlock (int x, int y, int _width, int _height, BYTE *dest) const { - const canvas_pixel_t *src; + if (r_swtruecolor) + return; + + const BYTE *src; #ifdef RANGECHECK if (x<0 @@ -1496,14 +1550,14 @@ void DCanvas::GetBlock (int x, int y, int _width, int _height, canvas_pixel_t *d while (_height--) { - memcpy (dest, src, _width * sizeof(canvas_pixel_t)); + memcpy (dest, src, _width); src += Pitch; dest += _width; } } // Returns true if the box was completely clipped. False otherwise. -bool DCanvas::ClipBox (int &x, int &y, int &w, int &h, const canvas_pixel_t *&src, const int srcpitch) const +bool DCanvas::ClipBox (int &x, int &y, int &w, int &h, const BYTE *&src, const int srcpitch) const { if (x >= Width || y >= Height || x+w <= 0 || y+h <= 0) { // Completely clipped off screen diff --git a/src/v_video.cpp b/src/v_video.cpp index 2fb46e88a..01043b8bc 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -345,7 +345,6 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) return; int gap; - canvas_pixel_t *spot; int x, y; if (x1 >= Width || y1 >= Height) @@ -365,11 +364,12 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) return; } - spot = Buffer + x1 + y1*Pitch; gap = Pitch - w; if (r_swtruecolor) { + uint32_t *spot = (uint32_t*)Buffer + x1 + y1*Pitch; + uint32_t fg = color.d; uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -402,6 +402,8 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) } else { + BYTE *spot = Buffer + x1 + y1*Pitch; + DWORD *bg2rgb; DWORD fg; @@ -441,12 +443,12 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) // //========================================================================== -void DCanvas::GetScreenshotBuffer(const canvas_pixel_t *&buffer, int &pitch, ESSType &color_type) +void DCanvas::GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &color_type) { Lock(true); buffer = GetBuffer(); pitch = GetPitch(); - color_type = SS_PAL; + color_type = r_swtruecolor ? SS_BGRA : SS_PAL; } //========================================================================== @@ -797,8 +799,8 @@ DSimpleCanvas::DSimpleCanvas (int width, int height) Pitch = width + MAX(0, CPU.DataL1LineSize - 8); } } - MemBuffer = new canvas_pixel_t[Pitch * height]; - memset (MemBuffer, 0, Pitch * height * sizeof(canvas_pixel_t)); + MemBuffer = new BYTE[Pitch * height * 4]; + memset (MemBuffer, 0, Pitch * height * 4); } //========================================================================== @@ -917,7 +919,7 @@ void DFrameBuffer::DrawRateStuff () { int i = I_GetTime(false); int tics = i - LastTic; - canvas_pixel_t *buffer = GetBuffer(); + BYTE *buffer = GetBuffer(); LastTic = i; if (tics > 20) tics = 20; @@ -925,10 +927,21 @@ void DFrameBuffer::DrawRateStuff () // Buffer can be NULL if we're doing hardware accelerated 2D if (buffer != NULL) { - buffer += (GetHeight()-1) * GetPitch(); - - for (i = 0; i < tics*2; i += 2) buffer[i] = 0xff; - for ( ; i < 20*2; i += 2) buffer[i] = 0x00; + if (r_swtruecolor) + { + uint32_t *buffer32 = (uint32_t*)buffer; + buffer32 += (GetHeight() - 1) * GetPitch(); + + for (i = 0; i < tics * 2; i += 2) buffer32[i] = 0xffffffff; + for (; i < 20 * 2; i += 2) buffer32[i] = 0xff000000; + } + else + { + buffer += (GetHeight() - 1) * GetPitch(); + + for (i = 0; i < tics * 2; i += 2) buffer[i] = 0xff; + for (; i < 20 * 2; i += 2) buffer[i] = 0x00; + } } else { diff --git a/src/v_video.h b/src/v_video.h index 27c09ee36..fa1ce83df 100644 --- a/src/v_video.h +++ b/src/v_video.h @@ -189,7 +189,7 @@ public: virtual ~DCanvas (); // Member variable access - inline canvas_pixel_t *GetBuffer () const { return Buffer; } + inline BYTE *GetBuffer () const { return Buffer; } inline int GetWidth () const { return Width; } inline int GetHeight () const { return Height; } inline int GetPitch () const { return Pitch; } @@ -202,10 +202,10 @@ public: virtual bool IsLocked () { return Buffer != NULL; } // Returns true if the surface is locked // Draw a linear block of pixels into the canvas - virtual void DrawBlock (int x, int y, int width, int height, const canvas_pixel_t *src) const; + virtual void DrawBlock (int x, int y, int width, int height, const BYTE *src) const; // Reads a linear block of pixels into the view buffer. - virtual void GetBlock (int x, int y, int width, int height, canvas_pixel_t *dest) const; + virtual void GetBlock (int x, int y, int width, int height, BYTE *dest) const; // Dim the entire canvas for the menus virtual void Dim (PalEntry color = 0); @@ -237,7 +237,7 @@ public: // Retrieves a buffer containing image data for a screenshot. // Hint: Pitch can be negative for upside-down images, in which case buffer // points to the last row in the buffer, which will be the first row output. - virtual void GetScreenshotBuffer(const canvas_pixel_t *&buffer, int &pitch, ESSType &color_type); + virtual void GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &color_type); // Releases the screenshot buffer. virtual void ReleaseScreenshotBuffer(); @@ -262,13 +262,13 @@ public: void DrawChar (FFont *font, int normalcolor, int x, int y, BYTE character, int tag_first, ...); protected: - canvas_pixel_t *Buffer; + BYTE *Buffer; int Width; int Height; int Pitch; int LockCount; - bool ClipBox (int &left, int &top, int &width, int &height, const canvas_pixel_t *&src, const int srcpitch) const; + bool ClipBox (int &left, int &top, int &width, int &height, const BYTE *&src, const int srcpitch) const; void DrawTextureV(FTexture *img, double x, double y, uint32 tag, va_list tags) = delete; virtual void DrawTextureParms(FTexture *img, DrawParms &parms); bool ParseDrawTextureTags (FTexture *img, double x, double y, uint32 tag, va_list tags, DrawParms *parms, bool fortext) const; @@ -297,7 +297,7 @@ public: void Unlock (); protected: - canvas_pixel_t *MemBuffer; + BYTE *MemBuffer; DSimpleCanvas() {} }; diff --git a/src/win32/fb_d3d9.cpp b/src/win32/fb_d3d9.cpp index 0cc9045ee..0cd847b97 100644 --- a/src/win32/fb_d3d9.cpp +++ b/src/win32/fb_d3d9.cpp @@ -1316,7 +1316,7 @@ void D3DFB::Draw3DPart(bool copy3d) else { uint32_t *dest = (uint32_t *)lockrect.pBits; - uint32_t *src = MemBuffer; + uint32_t *src = (uint32_t*)MemBuffer; for (int y = 0; y < Height; y++) { memcpy(dest, src, Width * sizeof(uint32_t)); @@ -1744,7 +1744,7 @@ void D3DFB::SetBlendingRect(int x1, int y1, int x2, int y2) // //========================================================================== -void D3DFB::GetScreenshotBuffer(const canvas_pixel_t *&buffer, int &pitch, ESSType &color_type) +void D3DFB::GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &color_type) { D3DLOCKED_RECT lrect; @@ -1770,7 +1770,7 @@ void D3DFB::GetScreenshotBuffer(const canvas_pixel_t *&buffer, int &pitch, ESSTy } else { - buffer = (const canvas_pixel_t *)lrect.pBits; + buffer = (const BYTE *)lrect.pBits; pitch = lrect.Pitch; color_type = SS_BGRA; } diff --git a/src/win32/fb_ddraw.cpp b/src/win32/fb_ddraw.cpp index 9be571f98..fbdf035a3 100644 --- a/src/win32/fb_ddraw.cpp +++ b/src/win32/fb_ddraw.cpp @@ -999,8 +999,8 @@ DDrawFB::LockSurfRes DDrawFB::LockSurf (LPRECT lockrect, LPDIRECTDRAWSURFACE toL LOG1 ("Final result after restoration attempts: %08lx\n", hr); return NoGood; } - Buffer = (canvas_pixel_t *)desc.lpSurface; - Pitch = desc.lPitch / sizeof(canvas_pixel_t); + Buffer = (BYTE *)desc.lpSurface; + Pitch = desc.lPitch; BufferingNow = false; return wasLost ? GoodWasLost : Good; } diff --git a/src/win32/win32iface.h b/src/win32/win32iface.h index d26765100..0b3333d63 100644 --- a/src/win32/win32iface.h +++ b/src/win32/win32iface.h @@ -252,7 +252,7 @@ public: bool PaintToWindow (); void SetVSync (bool vsync); void NewRefreshRate(); - void GetScreenshotBuffer(const canvas_pixel_t *&buffer, int &pitch, ESSType &color_type); + void GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &color_type); void ReleaseScreenshotBuffer(); void SetBlendingRect (int x1, int y1, int x2, int y2); bool Begin2D (bool copy3d); From 05220a713320b0b14525baad647f8ace577e19ee Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 31 May 2016 09:36:18 +0200 Subject: [PATCH 006/100] Added IsBgra() to DCanvas Changed SWRender output format to be decided by IsBgra() --- src/f_wipe.cpp | 10 ++++----- src/posix/cocoa/i_video.mm | 10 +++++---- src/posix/hardware.h | 2 +- src/posix/sdl/hardware.cpp | 5 ++++- src/posix/sdl/sdlvideo.cpp | 6 +++--- src/posix/sdl/sdlvideo.h | 2 +- src/r_draw.cpp | 39 +++++++++++++++++++++++++++++++++- src/r_drawt.cpp | 2 -- src/r_main.cpp | 28 ++++++++++++++---------- src/r_main.h | 2 ++ src/r_segs.cpp | 2 -- src/r_swrenderer.cpp | 12 ++++++----- src/r_things.cpp | 1 - src/textures/canvastexture.cpp | 2 +- src/v_draw.cpp | 16 ++++++-------- src/v_video.cpp | 26 +++++++++++------------ src/v_video.h | 8 ++++--- src/win32/fb_d3d9.cpp | 17 +++++++-------- src/win32/fb_ddraw.cpp | 7 +----- src/win32/hardware.cpp | 5 ++++- src/win32/hardware.h | 2 +- src/win32/win32iface.h | 8 +++---- src/win32/win32video.cpp | 21 ++++++++++-------- wadsrc/static/menudef.txt | 2 +- 24 files changed, 139 insertions(+), 96 deletions(-) diff --git a/src/f_wipe.cpp b/src/f_wipe.cpp index 7e1ec678e..84b6036e4 100644 --- a/src/f_wipe.cpp +++ b/src/f_wipe.cpp @@ -33,8 +33,6 @@ // SCREEN WIPE PACKAGE // -EXTERN_CVAR(Bool, r_swtruecolor) - static int CurrentWipeType; static short *wipe_scr_start; @@ -385,7 +383,7 @@ static bool (*wipes[])(int) = // Returns true if the wipe should be performed. bool wipe_StartScreen (int type) { - if (r_swtruecolor) + if (screen->IsBgra()) return false; CurrentWipeType = clamp(type, 0, wipe_NUMWIPES - 1); @@ -401,7 +399,7 @@ bool wipe_StartScreen (int type) void wipe_EndScreen (void) { - if (r_swtruecolor) + if (screen->IsBgra()) return; if (CurrentWipeType) @@ -420,7 +418,7 @@ bool wipe_ScreenWipe (int ticks) { bool rc; - if (r_swtruecolor) + if (screen->IsBgra()) return true; if (CurrentWipeType == wipe_None) @@ -436,7 +434,7 @@ bool wipe_ScreenWipe (int ticks) // Final things for the wipe void wipe_Cleanup() { - if (r_swtruecolor) + if (screen->IsBgra()) return; if (wipe_scr_start != NULL) diff --git a/src/posix/cocoa/i_video.mm b/src/posix/cocoa/i_video.mm index 5e073daf3..c97460a02 100644 --- a/src/posix/cocoa/i_video.mm +++ b/src/posix/cocoa/i_video.mm @@ -96,6 +96,8 @@ EXTERN_CVAR(Bool, ticker ) EXTERN_CVAR(Bool, vid_vsync) EXTERN_CVAR(Bool, vid_hidpi) +CVAR(Bool, swtruecolor, false, CVAR_ARCHIVE) + CUSTOM_CVAR(Bool, fullscreen, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) { extern int NewWidth, NewHeight, NewBits, DisplayBits; @@ -199,7 +201,7 @@ public: virtual EDisplayType GetDisplayType() { return DISPLAY_Both; } virtual void SetWindowedScale(float scale); - virtual DFrameBuffer* CreateFrameBuffer(int width, int height, bool fs, DFrameBuffer* old); + virtual DFrameBuffer* CreateFrameBuffer(int width, int height, bool bgra, bool fs, DFrameBuffer* old); virtual void StartModeIterator(int bits, bool fullscreen); virtual bool NextMode(int* width, int* height, bool* letterbox); @@ -518,7 +520,7 @@ bool CocoaVideo::NextMode(int* const width, int* const height, bool* const lette return false; } -DFrameBuffer* CocoaVideo::CreateFrameBuffer(const int width, const int height, const bool fullscreen, DFrameBuffer* const old) +DFrameBuffer* CocoaVideo::CreateFrameBuffer(const int width, const int height, const bool bgra, const bool fullscreen, DFrameBuffer* const old) { PalEntry flashColor = 0; int flashAmount = 0; @@ -762,7 +764,7 @@ CocoaVideo* CocoaVideo::GetInstance() CocoaFrameBuffer::CocoaFrameBuffer(int width, int height, bool fullscreen) -: DFrameBuffer(width, height) +: DFrameBuffer(width, height, false) , m_needPaletteUpdate(false) , m_gamma(0.0f) , m_needGammaUpdate(false) @@ -1064,7 +1066,7 @@ void I_CreateRenderer() DFrameBuffer* I_SetMode(int &width, int &height, DFrameBuffer* old) { - return Video->CreateFrameBuffer(width, height, fullscreen, old); + return Video->CreateFrameBuffer(width, height, swtruecolor, fullscreen, old); } bool I_CheckResolution(const int width, const int height, const int bits) diff --git a/src/posix/hardware.h b/src/posix/hardware.h index 618941fe5..3c06cb6c6 100644 --- a/src/posix/hardware.h +++ b/src/posix/hardware.h @@ -74,7 +74,7 @@ class IVideo virtual EDisplayType GetDisplayType () = 0; virtual void SetWindowedScale (float scale) = 0; - virtual DFrameBuffer *CreateFrameBuffer (int width, int height, bool fs, DFrameBuffer *old) = 0; + virtual DFrameBuffer *CreateFrameBuffer (int width, int height, bool bgra, bool fs, DFrameBuffer *old) = 0; virtual void StartModeIterator (int bits, bool fs) = 0; virtual bool NextMode (int *width, int *height, bool *letterbox) = 0; diff --git a/src/posix/sdl/hardware.cpp b/src/posix/sdl/hardware.cpp index 6142eb1d8..52bca35e7 100644 --- a/src/posix/sdl/hardware.cpp +++ b/src/posix/sdl/hardware.cpp @@ -51,6 +51,7 @@ EXTERN_CVAR (Bool, ticker) EXTERN_CVAR (Bool, fullscreen) +EXTERN_CVAR (Bool, swtruecolor) EXTERN_CVAR (Float, vid_winscale) IVideo *Video; @@ -128,7 +129,7 @@ DFrameBuffer *I_SetMode (int &width, int &height, DFrameBuffer *old) fs = fullscreen; break; } - DFrameBuffer *res = Video->CreateFrameBuffer (width, height, fs, old); + DFrameBuffer *res = Video->CreateFrameBuffer (width, height, swtruecolor, fs, old); /* Right now, CreateFrameBuffer cannot return NULL if (res == NULL) @@ -280,6 +281,8 @@ CUSTOM_CVAR (Int, vid_maxfps, 200, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) } } +CVAR (Bool, swtruecolor, false, CVAR_ARCHIVE) + extern int NewWidth, NewHeight, NewBits, DisplayBits; CUSTOM_CVAR (Bool, fullscreen, false, CVAR_ARCHIVE|CVAR_GLOBALCONFIG) diff --git a/src/posix/sdl/sdlvideo.cpp b/src/posix/sdl/sdlvideo.cpp index 04c3a3f2e..b050097be 100644 --- a/src/posix/sdl/sdlvideo.cpp +++ b/src/posix/sdl/sdlvideo.cpp @@ -257,7 +257,7 @@ bool SDLVideo::NextMode (int *width, int *height, bool *letterbox) return false; } -DFrameBuffer *SDLVideo::CreateFrameBuffer (int width, int height, bool fullscreen, DFrameBuffer *old) +DFrameBuffer *SDLVideo::CreateFrameBuffer (int width, int height, bool bgra, bool fullscreen, DFrameBuffer *old) { static int retry = 0; static int owidth, oheight; @@ -335,7 +335,7 @@ DFrameBuffer *SDLVideo::CreateFrameBuffer (int width, int height, bool fullscree } ++retry; - fb = static_cast(CreateFrameBuffer (width, height, fullscreen, NULL)); + fb = static_cast(CreateFrameBuffer (width, height, bgra, fullscreen, NULL)); } retry = 0; @@ -351,7 +351,7 @@ void SDLVideo::SetWindowedScale (float scale) // FrameBuffer implementation ----------------------------------------------- SDLFB::SDLFB (int width, int height, bool fullscreen, SDL_Window *oldwin) - : DFrameBuffer (width, height) + : DFrameBuffer (width, height, false) { int i; diff --git a/src/posix/sdl/sdlvideo.h b/src/posix/sdl/sdlvideo.h index 072167b5a..385733bc1 100644 --- a/src/posix/sdl/sdlvideo.h +++ b/src/posix/sdl/sdlvideo.h @@ -10,7 +10,7 @@ class SDLVideo : public IVideo EDisplayType GetDisplayType () { return DISPLAY_Both; } void SetWindowedScale (float scale); - DFrameBuffer *CreateFrameBuffer (int width, int height, bool fs, DFrameBuffer *old); + DFrameBuffer *CreateFrameBuffer (int width, int height, bool bgra, bool fs, DFrameBuffer *old); void StartModeIterator (int bits, bool fs); bool NextMode (int *width, int *height, bool *letterbox); diff --git a/src/r_draw.cpp b/src/r_draw.cpp index f939406bb..82169ec6f 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -179,7 +179,6 @@ FDynamicColormap ShadeFakeColormap[16]; BYTE identitymap[256]; EXTERN_CVAR (Int, r_columnmethod) -EXTERN_CVAR (Bool, r_swtruecolor) void R_InitShadeMaps() { @@ -4129,6 +4128,14 @@ const BYTE *R_GetColumn (FTexture *tex, int col) // [RH] Initialize the column drawer pointers void R_InitColumnDrawers () { + // Save a copy when switching to true color mode as the assembly palette drawers might change them + static bool pointers_saved = false; + static DWORD(*dovline1_saved)(); + static DWORD(*doprevline1_saved)(); + static DWORD(*domvline1_saved)(); + static void(*dovline4_saved)(); + static void(*domvline4_saved)(); + if (r_swtruecolor) { R_DrawColumnHoriz = R_DrawColumnHorizP_RGBA_C; @@ -4201,6 +4208,16 @@ void R_InitColumnDrawers () rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_RGBA_c; rt_initcols = rt_initcols_rgba; + if (!pointers_saved) + { + pointers_saved = true; + dovline1_saved = dovline1; + doprevline1_saved = doprevline1; + domvline1_saved = domvline1; + dovline4_saved = dovline4; + domvline4_saved = domvline4; + } + dovline1 = vlinec1_RGBA; doprevline1 = vlinec1_RGBA; dovline4 = vlinec4_RGBA; @@ -4304,7 +4321,27 @@ void R_InitColumnDrawers () rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_c; rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_c; rt_initcols = rt_initcols_pal; + + if (pointers_saved) + { + pointers_saved = false; + dovline1 = dovline1_saved; + doprevline1 = doprevline1_saved; + domvline1 = domvline1_saved; + dovline4 = dovline4_saved; + domvline4 = domvline4_saved; + } } + + colfunc = basecolfunc = R_DrawColumn; + fuzzcolfunc = R_DrawFuzzColumn; + transcolfunc = R_DrawTranslatedColumn; + spanfunc = R_DrawSpan; + + // [RH] Horizontal column drawers + hcolfunc_pre = R_DrawColumnHoriz; + hcolfunc_post1 = rt_map1col; + hcolfunc_post4 = rt_map4cols; } // [RH] Choose column drawers in a single place diff --git a/src/r_drawt.cpp b/src/r_drawt.cpp index 485ed7ab3..e47590c72 100644 --- a/src/r_drawt.cpp +++ b/src/r_drawt.cpp @@ -47,8 +47,6 @@ #include "r_things.h" #include "v_video.h" -EXTERN_CVAR(Bool, r_swtruecolor) - // I should have commented this stuff better. // // dc_temp is the buffer R_DrawColumnHoriz writes into. diff --git a/src/r_main.cpp b/src/r_main.cpp index 9dc61eea3..aec8310d5 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -103,7 +103,8 @@ bool r_dontmaplines; CVAR (String, r_viewsize, "", CVAR_NOSET) CVAR (Bool, r_shadercolormaps, true, CVAR_ARCHIVE) -CVAR (Bool, r_swtruecolor, false, CVAR_ARCHIVE) + +bool r_swtruecolor; double r_BaseVisibility; double r_WallVisibility; @@ -398,16 +399,6 @@ void R_InitRenderer() R_InitPlanes (); R_InitShadeMaps(); R_InitColumnDrawers (); - - colfunc = basecolfunc = R_DrawColumn; - fuzzcolfunc = R_DrawFuzzColumn; - transcolfunc = R_DrawTranslatedColumn; - spanfunc = R_DrawSpan; - - // [RH] Horizontal column drawers - hcolfunc_pre = R_DrawColumnHoriz; - hcolfunc_post1 = rt_map1col; - hcolfunc_post4 = rt_map4cols; } //========================================================================== @@ -962,6 +953,13 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, int x, int y, int width, int height, bool dontmaplines) { const bool savedviewactive = viewactive; + const bool savedoutputformat = r_swtruecolor; + + if (r_swtruecolor != canvas->IsBgra()) + { + r_swtruecolor = canvas->IsBgra(); + R_InitColumnDrawers(); + } viewwidth = width; RenderTarget = canvas; @@ -980,7 +978,15 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, screen->Lock (true); R_SetupBuffer (); screen->Unlock (); + viewactive = savedviewactive; + r_swtruecolor = savedoutputformat; + + if (r_swtruecolor != canvas->IsBgra()) + { + r_swtruecolor = canvas->IsBgra(); + R_InitColumnDrawers(); + } } //========================================================================== diff --git a/src/r_main.h b/src/r_main.h index c1034ea3e..765635e5d 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -106,6 +106,8 @@ inline uint32_t shade_pal_index(uint32_t index, uint32_t light) return 0xff000000 | (red << 16) | (green << 8) | blue; } +extern bool r_swtruecolor; + extern double GlobVis; void R_SetVisibility(double visibility); diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 8c71f0fb7..cab97adfc 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -61,8 +61,6 @@ CVAR(Bool, r_np2, true, 0) //CVAR (Int, ty, 8, 0) //CVAR (Int, tx, 8, 0) -EXTERN_CVAR(Bool, r_swtruecolor) - #define HEIGHTBITS 12 #define HEIGHTSHIFT (FRACBITS-HEIGHTBITS) diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 433007acb..15e2fda8f 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -155,6 +155,12 @@ void FSoftwareRenderer::Precache(BYTE *texhitlist, TMap &act void FSoftwareRenderer::RenderView(player_t *player) { + if (r_swtruecolor != screen->IsBgra()) + { + r_swtruecolor = screen->IsBgra(); + R_InitColumnDrawers(); + } + R_RenderActorView (player->mo); // [RH] Let cameras draw onto textures that were visible this frame. FCanvasTextureInfo::UpdateAll (); @@ -182,8 +188,7 @@ void FSoftwareRenderer::RemapVoxels() void FSoftwareRenderer::WriteSavePic (player_t *player, FILE *file, int width, int height) { -#ifdef PALETTEOUTPUT - DCanvas *pic = new DSimpleCanvas (width, height); + DCanvas *pic = new DSimpleCanvas (width, height, false); PalEntry palette[256]; // Take a snapshot of the player's view @@ -196,7 +201,6 @@ void FSoftwareRenderer::WriteSavePic (player_t *player, FILE *file, int width, i pic->Destroy(); pic->ObjectFlags |= OF_YesReallyDelete; delete pic; -#endif } //=========================================================================== @@ -313,7 +317,6 @@ void FSoftwareRenderer::CopyStackedViewParameters() void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoint, int fov) { -#ifdef PALETTEOUTPUT BYTE *Pixels = const_cast(tex->GetPixels()); DSimpleCanvas *Canvas = tex->GetCanvas(); @@ -337,7 +340,6 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin tex->SetUpdated(); fixedcolormap = savecolormap; realfixedcolormap = savecm; -#endif } //========================================================================== diff --git a/src/r_things.cpp b/src/r_things.cpp index 2abcc0e12..f52c80376 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -98,7 +98,6 @@ EXTERN_CVAR (Bool, st_scale) EXTERN_CVAR(Bool, r_shadercolormaps) EXTERN_CVAR(Int, r_drawfuzz) EXTERN_CVAR(Bool, r_deathcamera); -EXTERN_CVAR(Bool, r_swtruecolor) // // Sprite rotation 0 is facing the viewer, diff --git a/src/textures/canvastexture.cpp b/src/textures/canvastexture.cpp index d1f70439f..7242149a4 100644 --- a/src/textures/canvastexture.cpp +++ b/src/textures/canvastexture.cpp @@ -103,7 +103,7 @@ const BYTE *FCanvasTexture::GetPixels () void FCanvasTexture::MakeTexture () { - Canvas = new DSimpleCanvas (Width, Height); + Canvas = new DSimpleCanvas (Width, Height, false); Canvas->Lock (); GC::AddSoftRoot(Canvas); diff --git a/src/v_draw.cpp b/src/v_draw.cpp index ed6571ad3..b4f1ad4b5 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -77,8 +77,6 @@ extern "C" short spanend[MAXHEIGHT]; CVAR (Bool, hud_scale, false, CVAR_ARCHIVE); -EXTERN_CVAR(Bool, r_swtruecolor) - // For routines that take RGB colors, cache the previous lookup in case there // are several repetitions with the same color. static int LastPal = -1; @@ -1019,7 +1017,7 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) oldyyshifted = yy * GetPitch(); } - if (r_swtruecolor) + if (IsBgra()) { uint32_t *spot = (uint32_t*)GetBuffer() + oldyyshifted + xx; @@ -1091,7 +1089,7 @@ void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 real { swapvalues (x0, x1); } - if (r_swtruecolor) + if (IsBgra()) { uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0; for (int i = 0; i <= deltaX; i++) @@ -1104,7 +1102,7 @@ void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 real } else if (deltaX == 0) { // vertical line - if (r_swtruecolor) + if (IsBgra()) { uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0; int pitch = GetPitch(); @@ -1127,7 +1125,7 @@ void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 real } else if (deltaX == deltaY) { // diagonal line. - if (r_swtruecolor) + if (IsBgra()) { uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0; int advance = GetPitch() + xDir; @@ -1295,7 +1293,7 @@ void DCanvas::Clear (int left, int top, int right, int bottom, int palcolor, uin palcolor = PalFromRGB(color); } - if (r_swtruecolor) + if (IsBgra()) { uint32_t *dest = (uint32_t*)Buffer + top * Pitch + left; x = right - left; @@ -1502,7 +1500,7 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, // void DCanvas::DrawBlock (int x, int y, int _width, int _height, const BYTE *src) const { - if (r_swtruecolor) + if (IsBgra()) return; int srcpitch = _width; @@ -1531,7 +1529,7 @@ void DCanvas::DrawBlock (int x, int y, int _width, int _height, const BYTE *src) // void DCanvas::GetBlock (int x, int y, int _width, int _height, BYTE *dest) const { - if (r_swtruecolor) + if (IsBgra()) return; const BYTE *src; diff --git a/src/v_video.cpp b/src/v_video.cpp index 01043b8bc..bc99edbf1 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -65,8 +65,6 @@ #include "menu/menu.h" #include "r_data/voxels.h" -EXTERN_CVAR(Bool, r_swtruecolor) - FRenderer *Renderer; IMPLEMENT_ABSTRACT_CLASS (DCanvas) @@ -83,7 +81,7 @@ class DDummyFrameBuffer : public DFrameBuffer DECLARE_CLASS (DDummyFrameBuffer, DFrameBuffer); public: DDummyFrameBuffer (int width, int height) - : DFrameBuffer (0, 0) + : DFrameBuffer (0, 0, false) { Width = width; Height = height; @@ -208,13 +206,14 @@ DCanvas *DCanvas::CanvasChain = NULL; // //========================================================================== -DCanvas::DCanvas (int _width, int _height) +DCanvas::DCanvas (int _width, int _height, bool _bgra) { // Init member vars Buffer = NULL; LockCount = 0; Width = _width; Height = _height; + Bgra = _bgra; // Add to list of active canvases Next = CanvasChain; @@ -366,7 +365,7 @@ void DCanvas::Dim (PalEntry color, float damount, int x1, int y1, int w, int h) gap = Pitch - w; - if (r_swtruecolor) + if (IsBgra()) { uint32_t *spot = (uint32_t*)Buffer + x1 + y1*Pitch; @@ -448,7 +447,7 @@ void DCanvas::GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &colo Lock(true); buffer = GetBuffer(); pitch = GetPitch(); - color_type = r_swtruecolor ? SS_BGRA : SS_PAL; + color_type = IsBgra() ? SS_BGRA : SS_PAL; } //========================================================================== @@ -761,8 +760,8 @@ void DCanvas::CalcGamma (float gamma, BYTE gammalookup[256]) // //========================================================================== -DSimpleCanvas::DSimpleCanvas (int width, int height) - : DCanvas (width, height) +DSimpleCanvas::DSimpleCanvas (int width, int height, bool bgra) + : DCanvas (width, height, bgra) { // Making the pitch a power of 2 is very bad for performance // Try to maximize the number of cache lines that can be filled @@ -799,8 +798,9 @@ DSimpleCanvas::DSimpleCanvas (int width, int height) Pitch = width + MAX(0, CPU.DataL1LineSize - 8); } } - MemBuffer = new BYTE[Pitch * height * 4]; - memset (MemBuffer, 0, Pitch * height * 4); + int bytes_per_pixel = bgra ? 4 : 1; + MemBuffer = new BYTE[Pitch * height * bytes_per_pixel]; + memset (MemBuffer, 0, Pitch * height * bytes_per_pixel); } //========================================================================== @@ -869,8 +869,8 @@ void DSimpleCanvas::Unlock () // //========================================================================== -DFrameBuffer::DFrameBuffer (int width, int height) - : DSimpleCanvas (width, height) +DFrameBuffer::DFrameBuffer (int width, int height, bool bgra) + : DSimpleCanvas (width, height, bgra) { LastMS = LastSec = FrameCount = LastCount = LastTic = 0; Accel2D = false; @@ -927,7 +927,7 @@ void DFrameBuffer::DrawRateStuff () // Buffer can be NULL if we're doing hardware accelerated 2D if (buffer != NULL) { - if (r_swtruecolor) + if (IsBgra()) { uint32_t *buffer32 = (uint32_t*)buffer; buffer32 += (GetHeight() - 1) * GetPitch(); diff --git a/src/v_video.h b/src/v_video.h index fa1ce83df..120beff9a 100644 --- a/src/v_video.h +++ b/src/v_video.h @@ -185,7 +185,7 @@ class DCanvas : public DObject { DECLARE_ABSTRACT_CLASS (DCanvas, DObject) public: - DCanvas (int width, int height); + DCanvas (int width, int height, bool bgra); virtual ~DCanvas (); // Member variable access @@ -193,6 +193,7 @@ public: inline int GetWidth () const { return Width; } inline int GetHeight () const { return Height; } inline int GetPitch () const { return Pitch; } + inline bool IsBgra() const { return Bgra; } virtual bool IsValid (); @@ -267,6 +268,7 @@ protected: int Height; int Pitch; int LockCount; + bool Bgra; bool ClipBox (int &left, int &top, int &width, int &height, const BYTE *&src, const int srcpitch) const; void DrawTextureV(FTexture *img, double x, double y, uint32 tag, va_list tags) = delete; @@ -289,7 +291,7 @@ class DSimpleCanvas : public DCanvas { DECLARE_CLASS (DSimpleCanvas, DCanvas) public: - DSimpleCanvas (int width, int height); + DSimpleCanvas (int width, int height, bool bgra); ~DSimpleCanvas (); bool IsValid (); @@ -327,7 +329,7 @@ class DFrameBuffer : public DSimpleCanvas { DECLARE_ABSTRACT_CLASS (DFrameBuffer, DSimpleCanvas) public: - DFrameBuffer (int width, int height); + DFrameBuffer (int width, int height, bool bgra); // Force the surface to use buffered output if true is passed. virtual bool Lock (bool buffered) = 0; diff --git a/src/win32/fb_d3d9.cpp b/src/win32/fb_d3d9.cpp index 0cd847b97..fd84e3bbb 100644 --- a/src/win32/fb_d3d9.cpp +++ b/src/win32/fb_d3d9.cpp @@ -187,7 +187,6 @@ EXTERN_CVAR (Float, Gamma) EXTERN_CVAR (Bool, vid_vsync) EXTERN_CVAR (Float, transsouls) EXTERN_CVAR (Int, vid_refreshrate) -EXTERN_CVAR (Bool, r_swtruecolor) extern IDirect3D9 *D3D; @@ -243,8 +242,8 @@ CVAR(Bool, vid_hwaalines, true, CVAR_ARCHIVE|CVAR_GLOBALCONFIG) // //========================================================================== -D3DFB::D3DFB (UINT adapter, int width, int height, bool fullscreen) - : BaseWinFB (width, height) +D3DFB::D3DFB (UINT adapter, int width, int height, bool bgra, bool fullscreen) + : BaseWinFB (width, height, bgra) { D3DPRESENT_PARAMETERS d3dpp; @@ -766,7 +765,7 @@ void D3DFB::KillNativeTexs() bool D3DFB::CreateFBTexture () { - FBFormat = r_swtruecolor ? D3DFMT_A8R8G8B8 : D3DFMT_L8; + FBFormat = IsBgra() ? D3DFMT_A8R8G8B8 : D3DFMT_L8; if (FAILED(D3DDevice->CreateTexture(Width, Height, 1, D3DUSAGE_DYNAMIC, FBFormat, D3DPOOL_DEFAULT, &FBTexture, NULL))) { @@ -1307,7 +1306,7 @@ void D3DFB::Draw3DPart(bool copy3d) SUCCEEDED(FBTexture->LockRect (0, &lockrect, NULL, D3DLOCK_DISCARD))) || SUCCEEDED(FBTexture->LockRect (0, &lockrect, &texrect, 0))) { - if (r_swtruecolor && FBFormat == D3DFMT_A8R8G8B8) + if (IsBgra() && FBFormat == D3DFMT_A8R8G8B8) { if (lockrect.Pitch == Pitch * sizeof(uint32_t) && Pitch == Width) { @@ -1325,7 +1324,7 @@ void D3DFB::Draw3DPart(bool copy3d) } } } - else if (!r_swtruecolor && FBFormat == D3DFMT_L8) + else if (!IsBgra() && FBFormat == D3DFMT_L8) { if (lockrect.Pitch == Pitch && Pitch == Width) { @@ -1377,7 +1376,7 @@ void D3DFB::Draw3DPart(bool copy3d) memset(Constant, 0, sizeof(Constant)); SetAlphaBlend(D3DBLENDOP(0)); EnableAlphaTest(FALSE); - if (r_swtruecolor) + if (IsBgra()) SetPixelShader(Shaders[SHADER_NormalColor]); else SetPixelShader(Shaders[SHADER_NormalColorPal]); @@ -1398,7 +1397,7 @@ void D3DFB::Draw3DPart(bool copy3d) realfixedcolormap->ColorizeStart[1]/2, realfixedcolormap->ColorizeStart[2]/2, 0); color1 = D3DCOLOR_COLORVALUE(realfixedcolormap->ColorizeEnd[0]/2, realfixedcolormap->ColorizeEnd[1]/2, realfixedcolormap->ColorizeEnd[2]/2, 1); - if (r_swtruecolor) + if (IsBgra()) SetPixelShader(Shaders[SHADER_SpecialColormap]); else SetPixelShader(Shaders[SHADER_SpecialColormapPal]); @@ -1412,7 +1411,7 @@ void D3DFB::Draw3DPart(bool copy3d) CalcFullscreenCoords(verts, Accel2D, false, color0, color1); D3DDevice->DrawPrimitiveUP(D3DPT_TRIANGLEFAN, 2, verts, sizeof(FBVERTEX)); } - if (r_swtruecolor) + if (IsBgra()) SetPixelShader(Shaders[SHADER_NormalColor]); else SetPixelShader(Shaders[SHADER_NormalColorPal]); diff --git a/src/win32/fb_ddraw.cpp b/src/win32/fb_ddraw.cpp index fbdf035a3..5637e9695 100644 --- a/src/win32/fb_ddraw.cpp +++ b/src/win32/fb_ddraw.cpp @@ -60,9 +60,7 @@ // TYPES ------------------------------------------------------------------- -#ifdef USE_OBSOLETE_DDRAW IMPLEMENT_CLASS(DDrawFB) -#endif // EXTERNAL FUNCTION PROTOTYPES -------------------------------------------- @@ -120,10 +118,8 @@ cycle_t BlitCycles; // CODE -------------------------------------------------------------------- -#ifdef USE_OBSOLETE_DDRAW - DDrawFB::DDrawFB (int width, int height, bool fullscreen) - : BaseWinFB (width, height) + : BaseWinFB (width, height, false) { int i; @@ -1330,7 +1326,6 @@ void DDrawFB::Blank () PrimarySurf->Blt (NULL, NULL, NULL, DDBLT_COLORFILL, &blitFX); } } -#endif ADD_STAT (blit) { diff --git a/src/win32/hardware.cpp b/src/win32/hardware.cpp index 8cc770556..8856924c0 100644 --- a/src/win32/hardware.cpp +++ b/src/win32/hardware.cpp @@ -51,6 +51,7 @@ EXTERN_CVAR (Bool, ticker) EXTERN_CVAR (Bool, fullscreen) +EXTERN_CVAR (Bool, swtruecolor) EXTERN_CVAR (Float, vid_winscale) CVAR(Int, win_x, -1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) @@ -146,7 +147,7 @@ DFrameBuffer *I_SetMode (int &width, int &height, DFrameBuffer *old) } break; } - DFrameBuffer *res = Video->CreateFrameBuffer (width, height, fs, old); + DFrameBuffer *res = Video->CreateFrameBuffer (width, height, swtruecolor, fs, old); /* Right now, CreateFrameBuffer cannot return NULL if (res == NULL) @@ -310,6 +311,8 @@ void I_RestoreWindowedPos () MoveWindow (Window, winx, winy, winw, winh, TRUE); } +CVAR (Bool, swtruecolor, false, CVAR_ARCHIVE) + extern int NewWidth, NewHeight, NewBits, DisplayBits; CUSTOM_CVAR (Bool, fullscreen, true, CVAR_ARCHIVE|CVAR_GLOBALCONFIG|CVAR_NOINITCALL) diff --git a/src/win32/hardware.h b/src/win32/hardware.h index b2bafef32..184eeccf5 100644 --- a/src/win32/hardware.h +++ b/src/win32/hardware.h @@ -45,7 +45,7 @@ class IVideo virtual EDisplayType GetDisplayType () = 0; virtual void SetWindowedScale (float scale) = 0; - virtual DFrameBuffer *CreateFrameBuffer (int width, int height, bool fs, DFrameBuffer *old) = 0; + virtual DFrameBuffer *CreateFrameBuffer (int width, int height, bool bgra, bool fs, DFrameBuffer *old) = 0; virtual void StartModeIterator (int bits, bool fs) = 0; virtual bool NextMode (int *width, int *height, bool *letterbox) = 0; diff --git a/src/win32/win32iface.h b/src/win32/win32iface.h index 0b3333d63..d30475eb3 100644 --- a/src/win32/win32iface.h +++ b/src/win32/win32iface.h @@ -70,7 +70,7 @@ class Win32Video : public IVideo EDisplayType GetDisplayType () { return DISPLAY_Both; } void SetWindowedScale (float scale); - DFrameBuffer *CreateFrameBuffer (int width, int height, bool fs, DFrameBuffer *old); + DFrameBuffer *CreateFrameBuffer (int width, int height, bool bgra, bool fs, DFrameBuffer *old); void StartModeIterator (int bits, bool fs); bool NextMode (int *width, int *height, bool *letterbox); @@ -121,7 +121,7 @@ class BaseWinFB : public DFrameBuffer { DECLARE_ABSTRACT_CLASS(BaseWinFB, DFrameBuffer) public: - BaseWinFB (int width, int height) : DFrameBuffer (width, height), Windowed (true) {} + BaseWinFB (int width, int height, bool bgra) : DFrameBuffer (width, height, bgra), Windowed (true) {} bool IsFullscreen () { return !Windowed; } virtual void Blank () = 0; @@ -142,7 +142,6 @@ protected: BaseWinFB() {} }; -#ifdef USE_OBSOLETE_DDRAW class DDrawFB : public BaseWinFB { DECLARE_CLASS(DDrawFB, BaseWinFB) @@ -224,13 +223,12 @@ private: DDrawFB() {} }; -#endif class D3DFB : public BaseWinFB { DECLARE_CLASS(D3DFB, BaseWinFB) public: - D3DFB (UINT adapter, int width, int height, bool fullscreen); + D3DFB (UINT adapter, int width, int height, bool bgra, bool fullscreen); ~D3DFB (); bool IsValid (); diff --git a/src/win32/win32video.cpp b/src/win32/win32video.cpp index 3f3645d0b..5b2d5ef20 100644 --- a/src/win32/win32video.cpp +++ b/src/win32/win32video.cpp @@ -222,6 +222,13 @@ bool Win32Video::InitD3D9 () // Enumerate available display modes. FreeModes (); #ifndef PALETTEOUTPUT // To do: remove this again (AddD3DModes fails when there are too many modes available for videomenu to display) + + AddMode(320, 200, 8, 200, 0); + AddMode(320, 240, 8, 240, 0); + AddMode(640, 480, 8, 480, 0); + AddMode(800, 600, 8, 600, 0); + AddMode(1024, 768, 8, 768, 0); + AddMode(1920, 1080, 8, 1440, 0); // 1080p AddMode(1920*2, 1080*2, 8, 1440, 0); // 4k AddMode(2560, 1440, 8, 1440, 0); // 27" classic @@ -636,7 +643,7 @@ bool Win32Video::NextMode (int *width, int *height, bool *letterbox) return false; } -DFrameBuffer *Win32Video::CreateFrameBuffer (int width, int height, bool fullscreen, DFrameBuffer *old) +DFrameBuffer *Win32Video::CreateFrameBuffer (int width, int height, bool bgra, bool fullscreen, DFrameBuffer *old) { static int retry = 0; static int owidth, oheight; @@ -652,7 +659,8 @@ DFrameBuffer *Win32Video::CreateFrameBuffer (int width, int height, bool fullscr BaseWinFB *fb = static_cast (old); if (fb->Width == width && fb->Height == height && - fb->Windowed == !fullscreen) + fb->Windowed == !fullscreen && + fb->Bgra == bgra) { return old; } @@ -667,13 +675,9 @@ DFrameBuffer *Win32Video::CreateFrameBuffer (int width, int height, bool fullscr flashAmount = 0; } -#ifndef USE_OBSOLETE_DDRAW - fb = new D3DFB(m_Adapter, width, height, fullscreen); - LOG1("New fb created @ %p\n", fb); -#else if (D3D != NULL) { - fb = new D3DFB (m_Adapter, width, height, fullscreen); + fb = new D3DFB (m_Adapter, width, height, bgra, fullscreen); } else { @@ -738,10 +742,9 @@ DFrameBuffer *Win32Video::CreateFrameBuffer (int width, int height, bool fullscr } ++retry; - fb = static_cast(CreateFrameBuffer (width, height, fullscreen, NULL)); + fb = static_cast(CreateFrameBuffer (width, height, bgra, fullscreen, NULL)); } retry = 0; -#endif fb->SetFlash (flashColor, flashAmount); return fb; diff --git a/wadsrc/static/menudef.txt b/wadsrc/static/menudef.txt index 93e33ce79..3c712de96 100644 --- a/wadsrc/static/menudef.txt +++ b/wadsrc/static/menudef.txt @@ -661,7 +661,7 @@ OptionMenu "VideoOptions" Option "$DSPLYMNU_VSYNC", "vid_vsync", "OnOff" Option "$DSPLYMNU_CAPFPS", "cl_capfps", "OffOn" Option "$DSPLYMNU_COLUMNMETHOD", "r_columnmethod", "ColumnMethods" - Option "$DSPLYMNU_TRUECOLOR", "r_swtruecolor", "OnOff" + Option "$DSPLYMNU_TRUECOLOR", "swtruecolor", "OnOff" StaticText " " Option "$DSPLYMNU_WIPETYPE", "wipetype", "Wipes" From e929eec80f688f2afbd4a27ade847282aad9622d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 1 Jun 2016 05:28:14 +0200 Subject: [PATCH 007/100] Make x86 asm aware of swtruecolor --- src/doomtype.h | 5 + src/r_draw.cpp | 243 +++++++++++++++++++++++++++++++++------ src/r_draw.h | 1 + src/r_drawt.cpp | 25 ++-- src/r_plane.cpp | 41 ++++--- src/win32/win32video.cpp | 4 +- 6 files changed, 256 insertions(+), 63 deletions(-) diff --git a/src/doomtype.h b/src/doomtype.h index 39c59751d..9fca870d3 100644 --- a/src/doomtype.h +++ b/src/doomtype.h @@ -99,6 +99,11 @@ typedef TMap FClassMap; #endif +// Only use SSE intrinsics on Intel architecture +#if !defined(_M_IX86) && !defined(__i386__) && !defined(_M_X64) && !defined(__amd64__) +#define NO_SSE +#endif + #if defined(_MSC_VER) #define NOVTABLE __declspec(novtable) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 82169ec6f..d7b740973 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -1789,7 +1789,7 @@ void R_SetSpanSource(const BYTE *pixels) { ds_source = pixels; #ifdef X86_ASM - if (ds_cursource != ds_source) + if (!r_swtruecolor && ds_cursource != ds_source) { R_SetSpanSource_ASM(pixels); } @@ -1809,7 +1809,7 @@ void R_SetSpanColormap(BYTE *colormap) ds_colormap = colormap; ds_light = 0; #ifdef X86_ASM - if (ds_colormap != ds_curcolormap) + if (!r_swtruecolor && ds_colormap != ds_curcolormap) { R_SetSpanColormap_ASM (ds_colormap); } @@ -1838,7 +1838,8 @@ void R_SetupSpanBits(FTexture *tex) ds_ybits--; } #ifdef X86_ASM - R_SetSpanSize_ASM (ds_xbits, ds_ybits); + if (!r_swtruecolor) + R_SetSpanSize_ASM (ds_xbits, ds_ybits); #endif } @@ -1954,7 +1955,80 @@ void R_DrawSpanP_RGBA_C() { // 64x64 is the most common case by far, so special case it. + do + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + *dest++ = shade_pal_index(colormap[source[spot]], light); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + + do + { + // Current texture index in u,v. + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + *dest++ = shade_pal_index(colormap[source[spot]], light); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } +} + #ifndef NO_SSE +void R_DrawSpanP_RGBA_SSE() +{ + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const BYTE* source = ds_source; + const BYTE* colormap = ds_colormap; + int count; + int spot; + +#ifdef RANGECHECK + if (ds_x2 < ds_x1 || ds_x1 < 0 + || ds_x2 >= screen->width || ds_y > screen->height) + { + I_Error("R_DrawSpan: %i to %i at %i", ds_x1, ds_x2, ds_y); + } + // dscount++; +#endif + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + uint32_t light = calc_light_multiplier(ds_light); + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; @@ -2000,7 +2074,6 @@ void R_DrawSpanP_RGBA_C() } if (count == 0) return; -#endif do { @@ -2037,6 +2110,7 @@ void R_DrawSpanP_RGBA_C() } while (--count); } } +#endif #ifndef X86_ASM @@ -2971,6 +3045,12 @@ void (*domvline4)() = mvlineasm4; void setupvline (int fracbits) { + if (r_swtruecolor) + { + vlinebits = fracbits; + return; + } + #ifdef X86_ASM if (CPU.Family <= 5) { @@ -3075,23 +3155,43 @@ void vlinec4_RGBA() uint32_t *dest = (uint32_t*)dc_dest; int count = dc_count; int bits = vlinebits; + DWORD place; uint32_t light0 = calc_light_multiplier(palookuplight[0]); uint32_t light1 = calc_light_multiplier(palookuplight[1]); uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); + + do + { + dest[0] = shade_pal_index(palookupoffse[0][bufplce[0][(place = vplce[0]) >> bits]], light0); vplce[0] = place + vince[0]; + dest[1] = shade_pal_index(palookupoffse[1][bufplce[1][(place = vplce[1]) >> bits]], light1); vplce[1] = place + vince[1]; + dest[2] = shade_pal_index(palookupoffse[2][bufplce[2][(place = vplce[2]) >> bits]], light2); vplce[2] = place + vince[2]; + dest[3] = shade_pal_index(palookupoffse[3][bufplce[3][(place = vplce[3]) >> bits]], light3); vplce[3] = place + vince[3]; + dest += dc_pitch; + } while (--count); +} + #ifndef NO_SSE +void vlinec4_RGBA_SSE() +{ + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = vlinebits; + + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); + __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); uint32_t *palette = (uint32_t*)GPalette.BaseColors; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; -#endif do { -#ifndef NO_SSE - DWORD place0 = local_vplce[0]; DWORD place1 = local_vplce[1]; DWORD place2 = local_vplce[2]; @@ -3116,17 +3216,9 @@ void vlinec4_RGBA() fg_lo = _mm_srli_epi16(fg_lo, 8); fg = _mm_packus_epi16(fg_lo, fg_hi); _mm_storeu_si128((__m128i*)dest, fg); - -#else - dest[0] = shade_pal_index(palookupoffse[0][bufplce[0][(place = vplce[0]) >> bits]], light0); vplce[0] = place + vince[0]; - dest[1] = shade_pal_index(palookupoffse[1][bufplce[1][(place = vplce[1]) >> bits]], light1); vplce[1] = place + vince[1]; - dest[2] = shade_pal_index(palookupoffse[2][bufplce[2][(place = vplce[2]) >> bits]], light2); vplce[2] = place + vince[2]; - dest[3] = shade_pal_index(palookupoffse[3][bufplce[3][(place = vplce[3]) >> bits]], light3); vplce[3] = place + vince[3]; -#endif dest += dc_pitch; } while (--count); -#ifndef NO_SSE // Is this needed? Global variables makes it tricky to know.. vplce[0] = local_vplce[0]; vplce[1] = local_vplce[1]; @@ -3136,18 +3228,25 @@ void vlinec4_RGBA() vince[1] = local_vince[1]; vince[2] = local_vince[2]; vince[3] = local_vince[3]; -#endif } +#endif void setupmvline (int fracbits) { + if (!r_swtruecolor) + { #if defined(X86_ASM) - setupmvlineasm (fracbits); - domvline1 = mvlineasm1; - domvline4 = mvlineasm4; + setupmvlineasm(fracbits); + domvline1 = mvlineasm1; + domvline4 = mvlineasm4; #else - mvlinebits = fracbits; + mvlinebits = fracbits; #endif + } + else + { + mvlinebits = fracbits; + } } #if !defined(X86_ASM) @@ -3247,6 +3346,73 @@ void mvlinec4_RGBA() } while (--count); } +#ifndef NO_SSE +void mvlinec4_RGBA_SSE() +{ + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = vlinebits; + + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); + + __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); + __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + BYTE pix0 = bufplce[0][place0 >> bits]; + BYTE pix1 = bufplce[1][place1 >> bits]; + BYTE pix2 = bufplce[2][place2 >> bits]; + BYTE pix3 = bufplce[3][place3 >> bits]; + + // movemask = !(pix == 0) + __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); + + BYTE p0 = palookupoffse[0][pix0]; + BYTE p1 = palookupoffse[1][pix1]; + BYTE p2 = palookupoffse[2][pix2]; + BYTE p3 = palookupoffse[3][pix3]; + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + fg_hi = _mm_mullo_epi16(fg_hi, mlight_hi); + fg_hi = _mm_srli_epi16(fg_hi, 8); + fg_lo = _mm_mullo_epi16(fg_lo, mlight_lo); + fg_lo = _mm_srli_epi16(fg_lo, 8); + fg = _mm_packus_epi16(fg_lo, fg_hi); + _mm_maskmoveu_si128(fg, movemask, (char*)dest); + dest += dc_pitch; + } while (--count); + + // Is this needed? Global variables makes it tricky to know.. + vplce[0] = local_vplce[0]; + vplce[1] = local_vplce[1]; + vplce[2] = local_vplce[2]; + vplce[3] = local_vplce[3]; + vince[0] = local_vince[0]; + vince[1] = local_vince[1]; + vince[2] = local_vince[2]; + vince[3] = local_vince[3]; +} +#endif + extern "C" short spanend[MAXHEIGHT]; extern float rw_light; @@ -4138,14 +4304,28 @@ void R_InitColumnDrawers () if (r_swtruecolor) { + if (!pointers_saved) + { + pointers_saved = true; + dovline1_saved = dovline1; + doprevline1_saved = doprevline1; + domvline1_saved = domvline1; + dovline4_saved = dovline4; + domvline4_saved = domvline4; + } + R_DrawColumnHoriz = R_DrawColumnHorizP_RGBA_C; R_DrawColumn = R_DrawColumnP_RGBA_C; R_DrawFuzzColumn = R_DrawFuzzColumnP_RGBA_C; R_DrawTranslatedColumn = R_DrawTranslatedColumnP_RGBA_C; R_DrawShadedColumn = R_DrawShadedColumnP_RGBA_C; - R_DrawSpan = R_DrawSpanP_RGBA_C; R_DrawSpanMasked = R_DrawSpanMaskedP_RGBA_C; rt_map4cols = rt_map4cols_RGBA_c; +#ifndef NO_SSE + R_DrawSpan = R_DrawSpanP_RGBA_SSE; +#else + R_DrawSpan = R_DrawSpanP_RGBA_C; +#endif R_DrawSpanTranslucent = R_DrawSpanTranslucentP_RGBA_C; R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_RGBA_C; @@ -4208,21 +4388,18 @@ void R_InitColumnDrawers () rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_RGBA_c; rt_initcols = rt_initcols_rgba; - if (!pointers_saved) - { - pointers_saved = true; - dovline1_saved = dovline1; - doprevline1_saved = doprevline1; - domvline1_saved = domvline1; - dovline4_saved = dovline4; - domvline4_saved = domvline4; - } - dovline1 = vlinec1_RGBA; doprevline1 = vlinec1_RGBA; - dovline4 = vlinec4_RGBA; domvline1 = mvlinec1_RGBA; - domvline4 = mvlinec4_RGBA; + +#ifndef NO_SSE + dovline4 = vlinec4_RGBA_SSE; + domvline4 = mvlinec4_RGBA_SSE; +#else + dovline4 = vlinec4_RGBA; + domvline4 = mvlinec4_RGBA; +#endif + } else { diff --git a/src/r_draw.h b/src/r_draw.h index 2348914b6..d5007c885 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -269,6 +269,7 @@ void R_DrawFuzzColumnP_RGBA_C (void); void R_DrawTranslatedColumnP_RGBA_C (void); void R_DrawShadedColumnP_RGBA_C (void); void R_DrawSpanP_RGBA_C (void); +void R_DrawSpanP_RGBA_SSE (void); void R_DrawSpanMaskedP_RGBA_C (void); #endif diff --git a/src/r_drawt.cpp b/src/r_drawt.cpp index e47590c72..ca6862ed6 100644 --- a/src/r_drawt.cpp +++ b/src/r_drawt.cpp @@ -860,18 +860,21 @@ void rt_draw4cols (int sx) } #ifdef X86_ASM - // Setup assembly routines for changed colormaps or other parameters. - if (hcolfunc_post4 == rt_shaded4cols) + if (!r_swtruecolor) { - R_SetupShadedCol(); - } - else if (hcolfunc_post4 == rt_addclamp4cols || hcolfunc_post4 == rt_tlateaddclamp4cols) - { - R_SetupAddClampCol(); - } - else if (hcolfunc_post4 == rt_add4cols || hcolfunc_post4 == rt_tlateadd4cols) - { - R_SetupAddCol(); + // Setup assembly routines for changed colormaps or other parameters. + if (hcolfunc_post4 == rt_shaded4cols) + { + R_SetupShadedCol(); + } + else if (hcolfunc_post4 == rt_addclamp4cols || hcolfunc_post4 == rt_tlateaddclamp4cols) + { + R_SetupAddClampCol(); + } + else if (hcolfunc_post4 == rt_add4cols || hcolfunc_post4 == rt_tlateadd4cols) + { + R_SetupAddCol(); + } } #endif diff --git a/src/r_plane.cpp b/src/r_plane.cpp index a71590c9d..40e14c020 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -231,7 +231,7 @@ void R_MapPlane (int y, int x1) } #ifdef X86_ASM - if (ds_colormap != ds_curcolormap) + if (!r_swtruecolor && ds_colormap != ds_curcolormap) R_SetSpanColormap_ASM (ds_colormap); #endif @@ -1620,7 +1620,7 @@ void R_DrawSkyPlane (visplane_t *pl) void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) { #ifdef X86_ASM - if (ds_source != ds_cursource) + if (!r_swtruecolor && ds_source != ds_cursource) { R_SetSpanSource_ASM (ds_source); } @@ -1747,7 +1747,7 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t // //========================================================================== -void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) +void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) { static const float ifloatpow2[16] = { @@ -1782,7 +1782,7 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t // p is the texture origin in view space // Don't add in the offsets at this stage, because doing so can result in // errors if the flat is rotated. - ang = M_PI*3/2 - ViewAngle.Radians(); + ang = M_PI * 3 / 2 - ViewAngle.Radians(); cosine = cos(ang), sine = sin(ang); p[0] = ViewPos.X * cosine - ViewPos.Y * sine; p[2] = ViewPos.X * sine + ViewPos.Y * cosine; @@ -1793,25 +1793,25 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t cosine = cos(ang), sine = sin(ang); m[0] = yscale * cosine; m[2] = yscale * sine; -// m[1] = pl->height.ZatPointF (0, iyscale) - pl->height.ZatPointF (0,0)); -// VectorScale2 (m, 64.f/VectorLength(m)); + // m[1] = pl->height.ZatPointF (0, iyscale) - pl->height.ZatPointF (0,0)); + // VectorScale2 (m, 64.f/VectorLength(m)); - // n is the u direction vector in view space + // n is the u direction vector in view space #if 0 //let's use the sin/cosine we already know instead of computing new ones - ang += M_PI/2 - n[0] = -xscale * cos(ang); + ang += M_PI / 2 + n[0] = -xscale * cos(ang); n[2] = -xscale * sin(ang); #else n[0] = xscale * sine; n[2] = -xscale * cosine; #endif -// n[1] = pl->height.ZatPointF (ixscale, 0) - pl->height.ZatPointF (0,0)); -// VectorScale2 (n, 64.f/VectorLength(n)); + // n[1] = pl->height.ZatPointF (ixscale, 0) - pl->height.ZatPointF (0,0)); + // VectorScale2 (n, 64.f/VectorLength(n)); - // This code keeps the texture coordinates constant across the x,y plane no matter - // how much you slope the surface. Use the commented-out code above instead to keep - // the textures a constant size across the surface's plane instead. + // This code keeps the texture coordinates constant across the x,y plane no matter + // how much you slope the surface. Use the commented-out code above instead to keep + // the textures a constant size across the surface's plane instead. cosine = cos(planeang), sine = sin(planeang); m[1] = pl->height.ZatPoint(ViewPos.X + yscale * sine, ViewPos.Y + yscale * cosine) - zeroheight; n[1] = pl->height.ZatPoint(ViewPos.X - xscale * cosine, ViewPos.Y + xscale * sine) - zeroheight; @@ -1861,9 +1861,16 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t } #if defined(X86_ASM) - if (ds_source != ds_curtiltedsource) - R_SetTiltedSpanSource_ASM (ds_source); - R_MapVisPlane (pl, R_DrawTiltedPlane_ASM); + if (!r_swtruecolor) + { + if (ds_source != ds_curtiltedsource) + R_SetTiltedSpanSource_ASM(ds_source); + R_MapVisPlane(pl, R_DrawTiltedPlane_ASM); + } + else + { + R_MapVisPlane(pl, R_MapTiltedPlane); + } #else R_MapVisPlane (pl, R_MapTiltedPlane); #endif diff --git a/src/win32/win32video.cpp b/src/win32/win32video.cpp index 5b2d5ef20..a180a35ea 100644 --- a/src/win32/win32video.cpp +++ b/src/win32/win32video.cpp @@ -229,8 +229,8 @@ bool Win32Video::InitD3D9 () AddMode(800, 600, 8, 600, 0); AddMode(1024, 768, 8, 768, 0); - AddMode(1920, 1080, 8, 1440, 0); // 1080p - AddMode(1920*2, 1080*2, 8, 1440, 0); // 4k + AddMode(1920, 1080, 8, 1080, 0); // 1080p + AddMode(1920*2, 1080*2, 8, 1080*2, 0); // 4k AddMode(2560, 1440, 8, 1440, 0); // 27" classic AddMode(2560*2, 1440*2, 8, 1440*2, 0); // 5k #else From b9d7a98aeceac8987917db03c9eecce50b1b4abd Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 1 Jun 2016 06:02:37 +0200 Subject: [PATCH 008/100] Change swtruecolor cvar to take effect immediately --- src/posix/cocoa/i_video.mm | 11 ++++++++++- src/posix/sdl/hardware.cpp | 12 ++++++++++-- src/win32/hardware.cpp | 12 ++++++++++-- src/win32/win32video.cpp | 14 -------------- 4 files changed, 30 insertions(+), 19 deletions(-) diff --git a/src/posix/cocoa/i_video.mm b/src/posix/cocoa/i_video.mm index c97460a02..c2eb58c6d 100644 --- a/src/posix/cocoa/i_video.mm +++ b/src/posix/cocoa/i_video.mm @@ -96,7 +96,16 @@ EXTERN_CVAR(Bool, ticker ) EXTERN_CVAR(Bool, vid_vsync) EXTERN_CVAR(Bool, vid_hidpi) -CVAR(Bool, swtruecolor, false, CVAR_ARCHIVE) +CUSTOM_CVAR(Bool, swtruecolor, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) +{ + // Strictly speaking this doesn't require a mode switch, but it is the easiest + // way to force a CreateFramebuffer call without a lot of refactoring. + extern int NewWidth, NewHeight, NewBits, DisplayBits; + NewWidth = screen->GetWidth(); + NewHeight = screen->GetHeight(); + NewBits = DisplayBits; + setmodeneeded = true; +} CUSTOM_CVAR(Bool, fullscreen, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) { diff --git a/src/posix/sdl/hardware.cpp b/src/posix/sdl/hardware.cpp index 52bca35e7..9de4d03a4 100644 --- a/src/posix/sdl/hardware.cpp +++ b/src/posix/sdl/hardware.cpp @@ -281,10 +281,18 @@ CUSTOM_CVAR (Int, vid_maxfps, 200, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) } } -CVAR (Bool, swtruecolor, false, CVAR_ARCHIVE) - extern int NewWidth, NewHeight, NewBits, DisplayBits; +CUSTOM_CVAR(Bool, swtruecolor, false, CVAR_ARCHIVE|CVAR_GLOBALCONFIG|CVAR_NOINITCALL) +{ + // Strictly speaking this doesn't require a mode switch, but it is the easiest + // way to force a CreateFramebuffer call without a lot of refactoring. + NewWidth = screen->GetWidth(); + NewHeight = screen->GetHeight(); + NewBits = DisplayBits; + setmodeneeded = true; +} + CUSTOM_CVAR (Bool, fullscreen, false, CVAR_ARCHIVE|CVAR_GLOBALCONFIG) { NewWidth = screen->GetWidth(); diff --git a/src/win32/hardware.cpp b/src/win32/hardware.cpp index 8856924c0..49c970457 100644 --- a/src/win32/hardware.cpp +++ b/src/win32/hardware.cpp @@ -311,10 +311,18 @@ void I_RestoreWindowedPos () MoveWindow (Window, winx, winy, winw, winh, TRUE); } -CVAR (Bool, swtruecolor, false, CVAR_ARCHIVE) - extern int NewWidth, NewHeight, NewBits, DisplayBits; +CUSTOM_CVAR(Bool, swtruecolor, false, CVAR_ARCHIVE|CVAR_GLOBALCONFIG|CVAR_NOINITCALL) +{ + // Strictly speaking this doesn't require a mode switch, but it is the easiest + // way to force a CreateFramebuffer call without a lot of refactoring. + NewWidth = screen->GetWidth(); + NewHeight = screen->GetHeight(); + NewBits = DisplayBits; + setmodeneeded = true; +} + CUSTOM_CVAR (Bool, fullscreen, true, CVAR_ARCHIVE|CVAR_GLOBALCONFIG|CVAR_NOINITCALL) { NewWidth = screen->GetWidth(); diff --git a/src/win32/win32video.cpp b/src/win32/win32video.cpp index a180a35ea..74b10ef07 100644 --- a/src/win32/win32video.cpp +++ b/src/win32/win32video.cpp @@ -221,22 +221,8 @@ bool Win32Video::InitD3D9 () // Enumerate available display modes. FreeModes (); -#ifndef PALETTEOUTPUT // To do: remove this again (AddD3DModes fails when there are too many modes available for videomenu to display) - - AddMode(320, 200, 8, 200, 0); - AddMode(320, 240, 8, 240, 0); - AddMode(640, 480, 8, 480, 0); - AddMode(800, 600, 8, 600, 0); - AddMode(1024, 768, 8, 768, 0); - - AddMode(1920, 1080, 8, 1080, 0); // 1080p - AddMode(1920*2, 1080*2, 8, 1080*2, 0); // 4k - AddMode(2560, 1440, 8, 1440, 0); // 27" classic - AddMode(2560*2, 1440*2, 8, 1440*2, 0); // 5k -#else AddD3DModes (m_Adapter, D3DFMT_X8R8G8B8); AddD3DModes (m_Adapter, D3DFMT_R5G6B5); -#endif if (Args->CheckParm ("-2")) { // Force all modes to be pixel-doubled. ScaleModes (1); From 4f635983fcf791b52535b9c597bdc4e823fef635 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 1 Jun 2016 08:54:39 +0200 Subject: [PATCH 009/100] Add bgra support to OS X target --- src/posix/cocoa/i_video.mm | 30 +++++++++++++++++++++--------- src/r_main.cpp | 1 - src/r_swrenderer.cpp | 1 + src/v_draw.cpp | 6 ++++++ 4 files changed, 28 insertions(+), 10 deletions(-) diff --git a/src/posix/cocoa/i_video.mm b/src/posix/cocoa/i_video.mm index c2eb58c6d..425fe5887 100644 --- a/src/posix/cocoa/i_video.mm +++ b/src/posix/cocoa/i_video.mm @@ -249,7 +249,7 @@ private: class CocoaFrameBuffer : public DFrameBuffer { public: - CocoaFrameBuffer(int width, int height, bool fullscreen); + CocoaFrameBuffer(int width, int height, bool bgra, bool fullscreen); ~CocoaFrameBuffer(); virtual bool Lock(bool buffer); @@ -536,7 +536,7 @@ DFrameBuffer* CocoaVideo::CreateFrameBuffer(const int width, const int height, c if (NULL != old) { - if (width == m_width && height == m_height) + if (width == m_width && height == m_height && bgra == old->IsBgra()) { SetMode(width, height, fullscreen, vid_hidpi); return old; @@ -553,7 +553,7 @@ DFrameBuffer* CocoaVideo::CreateFrameBuffer(const int width, const int height, c delete old; } - CocoaFrameBuffer* fb = new CocoaFrameBuffer(width, height, fullscreen); + CocoaFrameBuffer* fb = new CocoaFrameBuffer(width, height, bgra, fullscreen); fb->SetFlash(flashColor, flashAmount); SetMode(width, height, fullscreen, vid_hidpi); @@ -772,8 +772,8 @@ CocoaVideo* CocoaVideo::GetInstance() } -CocoaFrameBuffer::CocoaFrameBuffer(int width, int height, bool fullscreen) -: DFrameBuffer(width, height, false) +CocoaFrameBuffer::CocoaFrameBuffer(int width, int height, bool bgra, bool fullscreen) +: DFrameBuffer(width, height, bgra) , m_needPaletteUpdate(false) , m_gamma(0.0f) , m_needGammaUpdate(false) @@ -867,8 +867,18 @@ void CocoaFrameBuffer::Update() FlipCycles.Reset(); BlitCycles.Clock(); - GPfx.Convert(MemBuffer, Pitch, m_pixelBuffer, Width * BYTES_PER_PIXEL, - Width, Height, FRACUNIT, FRACUNIT, 0, 0); + if (IsBgra()) + { + for (int y = 0; y < Height; y++) + { + memcpy((uint32_t*)m_pixelBuffer + y * Width, (uint32_t*)MemBuffer + y * Pitch, Width * BYTES_PER_PIXEL); + } + } + else + { + GPfx.Convert(MemBuffer, Pitch, m_pixelBuffer, Width * BYTES_PER_PIXEL, + Width, Height, FRACUNIT, FRACUNIT, 0, 0); + } FlipCycles.Clock(); Flip(); @@ -1000,8 +1010,10 @@ void CocoaFrameBuffer::Flip() static const GLenum format = GL_ABGR_EXT; #endif // __LITTLE_ENDIAN__ - glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, - Width, Height, 0, format, GL_UNSIGNED_BYTE, m_pixelBuffer); + if (IsBgra()) + glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, Width, Height, 0, GL_BGRA_EXT, GL_UNSIGNED_BYTE, m_pixelBuffer); + else + glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0, GL_RGBA8, Width, Height, 0, format, GL_UNSIGNED_BYTE, m_pixelBuffer); glBegin(GL_QUADS); glColor4f(1.0f, 1.0f, 1.0f, 1.0f); diff --git a/src/r_main.cpp b/src/r_main.cpp index aec8310d5..aaf8fc532 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -984,7 +984,6 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, if (r_swtruecolor != canvas->IsBgra()) { - r_swtruecolor = canvas->IsBgra(); R_InitColumnDrawers(); } } diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 15e2fda8f..ee6ac5fed 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -58,6 +58,7 @@ void R_InitRenderer(); void FSoftwareRenderer::Init() { + r_swtruecolor = screen->IsBgra(); R_InitRenderer(); } diff --git a/src/v_draw.cpp b/src/v_draw.cpp index b4f1ad4b5..57fac3cda 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -129,6 +129,12 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) static short bottomclipper[MAXWIDTH], topclipper[MAXWIDTH]; const BYTE *translation = NULL; + if (r_swtruecolor != IsBgra()) + { + r_swtruecolor = IsBgra(); + R_InitColumnDrawers(); + } + if (parms.masked) { spanptr = &spans; From 47f32d03cd2d0b8966361b49acb5d0b6de40b94b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 2 Jun 2016 14:49:03 +0200 Subject: [PATCH 010/100] Fixed some light and blending functions for the true color mode --- src/r_draw.cpp | 283 +++++++++++++++++++++++++------------------ src/r_draw.h | 2 + src/r_drawt_rgba.cpp | 76 +++++++----- src/r_main.h | 13 +- src/r_plane.cpp | 48 ++++++-- src/r_segs.cpp | 41 +++---- src/r_things.cpp | 13 +- src/v_draw.cpp | 6 +- 8 files changed, 284 insertions(+), 198 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index d7b740973..5a314e640 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -155,6 +155,8 @@ int dc_color; // [RH] Color for column filler DWORD dc_srccolor; DWORD *dc_srcblend; // [RH] Source and destination DWORD *dc_destblend; // blending lookups +fixed_t dc_srcalpha; // Alpha value used by dc_srcblend +fixed_t dc_destalpha; // Alpha value used by dc_destblend // first pixel in a column (possibly virtual) const BYTE* dc_source; @@ -414,9 +416,10 @@ void R_FillAddColumn_RGBA_C() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; - uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; - uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 24) & 0xff; + uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg_blue = fg & 0xff; do { @@ -478,9 +481,10 @@ void R_FillAddClampColumn_RGBA() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; - uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; - uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 24) & 0xff; + uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg_blue = fg & 0xff; do { @@ -541,9 +545,10 @@ void R_FillSubClampColumn_RGBA() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; - uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; - uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 24) & 0xff; + uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg_blue = fg & 0xff; do { @@ -604,9 +609,10 @@ void R_FillRevSubClampColumn_RGBA() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg_red = (dc_srccolor >> 12) & 0xf8; - uint32_t fg_green = (dc_srccolor >> 2) & 0xf8; - uint32_t fg_blue = (dc_srccolor << 3) & 0xf8; + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg_red = (fg >> 24) & 0xff; + uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg_blue = fg & 0xff; do { @@ -946,6 +952,9 @@ void R_DrawAddColumnP_RGBA_C() int pitch = dc_pitch; BYTE *colormap = dc_colormap; + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], 0); @@ -958,9 +967,9 @@ void R_DrawAddColumnP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -1107,6 +1116,9 @@ void R_DrawTlatedAddColumnP_RGBA_C() const BYTE *source = dc_source; int pitch = dc_pitch; + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); @@ -1119,9 +1131,9 @@ void R_DrawTlatedAddColumnP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -1276,6 +1288,9 @@ void R_DrawAddClampColumnP_RGBA_C() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); @@ -1287,9 +1302,9 @@ void R_DrawAddClampColumnP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -1363,6 +1378,9 @@ void R_DrawAddClampTranslatedColumnP_RGBA_C() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); @@ -1374,9 +1392,9 @@ void R_DrawAddClampTranslatedColumnP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -1447,6 +1465,9 @@ void R_DrawSubClampColumnP_RGBA_C() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); @@ -1458,9 +1479,9 @@ void R_DrawSubClampColumnP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -1533,6 +1554,9 @@ void R_DrawSubClampTranslatedColumnP_RGBA_C() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); @@ -1544,9 +1568,9 @@ void R_DrawSubClampTranslatedColumnP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -1616,6 +1640,8 @@ void R_DrawRevSubClampColumnP_RGBA_C() const BYTE *source = dc_source; int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { @@ -1628,9 +1654,9 @@ void R_DrawRevSubClampColumnP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -1703,6 +1729,9 @@ void R_DrawRevSubClampTranslatedColumnP_RGBA_C() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); @@ -1714,9 +1743,9 @@ void R_DrawRevSubClampTranslatedColumnP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -1806,8 +1835,7 @@ void R_SetSpanSource(const BYTE *pixels) void R_SetSpanColormap(BYTE *colormap) { - ds_colormap = colormap; - ds_light = 0; + R_SetDSColorMapLight(colormap, 0, 0); #ifdef X86_ASM if (!r_swtruecolor && ds_colormap != ds_curcolormap) { @@ -2316,8 +2344,6 @@ void R_DrawSpanTranslucentP_RGBA_C() const BYTE* colormap = ds_colormap; int count; int spot; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -2331,6 +2357,9 @@ void R_DrawSpanTranslucentP_RGBA_C() uint32_t light = calc_light_multiplier(ds_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + if (ds_xbits == 6 && ds_ybits == 6) { // 64x64 is the most common case by far, so special case it. @@ -2347,9 +2376,9 @@ void R_DrawSpanTranslucentP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; @@ -2375,9 +2404,9 @@ void R_DrawSpanTranslucentP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; @@ -2474,11 +2503,12 @@ void R_DrawSpanMaskedTranslucentP_RGBA_C() const BYTE* colormap = ds_colormap; int count; int spot; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; uint32_t light = calc_light_multiplier(ds_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -2509,9 +2539,9 @@ void R_DrawSpanMaskedTranslucentP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -2542,9 +2572,9 @@ void R_DrawSpanMaskedTranslucentP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -2636,11 +2666,12 @@ void R_DrawSpanAddClampP_RGBA_C() const BYTE* colormap = ds_colormap; int count; int spot; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; uint32_t light = calc_light_multiplier(ds_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -2667,9 +2698,9 @@ void R_DrawSpanAddClampP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; @@ -2695,9 +2726,9 @@ void R_DrawSpanAddClampP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; @@ -2800,11 +2831,12 @@ void R_DrawSpanMaskedAddClampP_RGBA_C() const BYTE* colormap = ds_colormap; int count; int spot; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; uint32_t light = calc_light_multiplier(ds_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -2835,9 +2867,9 @@ void R_DrawSpanMaskedAddClampP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -2868,9 +2900,9 @@ void R_DrawSpanMaskedAddClampP_RGBA_C() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -3468,8 +3500,7 @@ void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip) clearbufshort (spanend+t2, b2-t2, x); } - dc_colormap = basecolormapdata + (rcolormap << COLORMAPSHIFT); - dc_light = 0; + R_SetColorMapLight(basecolormapdata, (float)light, wallshade); for (--x; x >= x1; --x) { @@ -3494,8 +3525,7 @@ void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip) clearbufshort (spanend+t2, b2-t2, x); } rcolormap = lcolormap; - dc_colormap = basecolormapdata + (lcolormap << COLORMAPSHIFT); - dc_light = 0; + R_SetColorMapLight(basecolormapdata, (float)light, wallshade); } else { @@ -3594,8 +3624,7 @@ void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip) clearbufshort(spanend + t2, b2 - t2, x); } - dc_colormap = basecolormapdata + (rcolormap << COLORMAPSHIFT); - dc_light = 0; + R_SetColorMapLight(basecolormapdata, (float)light, wallshade); for (--x; x >= x1; --x) { @@ -3620,8 +3649,7 @@ void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip) clearbufshort(spanend + t2, b2 - t2, x); } rcolormap = lcolormap; - dc_colormap = basecolormapdata + (lcolormap << COLORMAPSHIFT); - dc_light = 0; + R_SetColorMapLight(basecolormapdata, (float)light, wallshade); } else { @@ -3717,11 +3745,11 @@ fixed_t tmvline1_add_RGBA() int bits = tmvlinebits; int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { BYTE pix = source[frac >> bits]; @@ -3736,9 +3764,9 @@ fixed_t tmvline1_add_RGBA() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -3794,6 +3822,9 @@ void tmvline4_add_RGBA() light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { for (int i = 0; i < 4; ++i) @@ -3810,9 +3841,9 @@ void tmvline4_add_RGBA() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -3871,11 +3902,11 @@ fixed_t tmvline1_addclamp_RGBA() int bits = tmvlinebits; int pitch = dc_pitch; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { BYTE pix = source[frac >> bits]; @@ -3890,9 +3921,9 @@ fixed_t tmvline1_addclamp_RGBA() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -3947,6 +3978,9 @@ void tmvline4_addclamp_RGBA() light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { for (int i = 0; i < 4; ++i) @@ -3963,9 +3997,9 @@ void tmvline4_addclamp_RGBA() uint32_t bg_green = (dest[i] >> 8) & 0xff; uint32_t bg_blue = (dest[i]) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -4023,6 +4057,9 @@ fixed_t tmvline1_subclamp_RGBA() uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { BYTE pix = source[frac >> bits]; @@ -4037,9 +4074,9 @@ fixed_t tmvline1_subclamp_RGBA() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -4093,6 +4130,9 @@ void tmvline4_subclamp_RGBA() light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { for (int i = 0; i < 4; ++i) @@ -4109,9 +4149,9 @@ void tmvline4_subclamp_RGBA() uint32_t bg_green = (dest[i] >> 8) & 0xff; uint32_t bg_blue = (dest[i]) & 0xff; - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -4169,6 +4209,9 @@ fixed_t tmvline1_revsubclamp_RGBA() uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { BYTE pix = source[frac >> bits]; @@ -4183,9 +4226,9 @@ fixed_t tmvline1_revsubclamp_RGBA() uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -4233,15 +4276,15 @@ void tmvline4_revsubclamp_RGBA() int count = dc_count; int bits = tmvlinebits; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - uint32_t light[4]; light[0] = calc_light_multiplier(palookuplight[0]); light[1] = calc_light_multiplier(palookuplight[1]); light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { for (int i = 0; i < 4; ++i) @@ -4258,9 +4301,9 @@ void tmvline4_revsubclamp_RGBA() uint32_t bg_green = (dest[i] >> 8) & 0xff; uint32_t bg_blue = (dest[i]) & 0xff; - uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -4558,16 +4601,22 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags) { dc_srcblend = Col2RGB8_Inverse[fglevel>>10]; dc_destblend = Col2RGB8_LessPrecision[bglevel>>10]; + dc_srcalpha = fglevel; + dc_destalpha = bglevel; } else if (op == STYLEOP_Add && fglevel + bglevel <= FRACUNIT) { dc_srcblend = Col2RGB8[fglevel>>10]; dc_destblend = Col2RGB8[bglevel>>10]; + dc_srcalpha = fglevel; + dc_destalpha = bglevel; } else { dc_srcblend = Col2RGB8_LessPrecision[fglevel>>10]; dc_destblend = Col2RGB8_LessPrecision[bglevel>>10]; + dc_srcalpha = fglevel; + dc_destalpha = bglevel; } switch (op) { @@ -4736,12 +4785,15 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, hcolfunc_post1 = rt_shaded1col; hcolfunc_post4 = rt_shaded4cols; dc_color = fixedcolormap ? fixedcolormap[APART(color)] : basecolormap->Maps[APART(color)]; - dc_colormap = (basecolormap = &ShadeFakeColormap[16-alpha])->Maps; + lighttable_t *colormap = (basecolormap = &ShadeFakeColormap[16-alpha])->Maps; if (fixedlightlev >= 0 && fixedcolormap == NULL) { - dc_colormap += fixedlightlev; + R_SetColorMapLight(colormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + } + else + { + R_SetColorMapLight(colormap, 0, 0); } - dc_light = 0; return r_columnmethod ? DoDraw1 : DoDraw0; } @@ -4766,8 +4818,7 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, // with the alpha. dc_srccolor = ((((r*x)>>4)<<20) | ((g*x)>>4) | ((((b)*x)>>4)<<10)) & 0x3feffbff; hcolfunc_pre = R_FillColumnHoriz; - dc_colormap = identitymap; - dc_light = 0; + R_SetColorMapLight(identitymap, 0, 0); } if (!R_SetBlendFunc (style.BlendOp, fglevel, bglevel, style.Flags)) diff --git a/src/r_draw.h b/src/r_draw.h index d5007c885..f60b2299e 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -41,6 +41,8 @@ extern "C" int dc_color; // [RH] For flat colors (no texturing) extern "C" DWORD dc_srccolor; extern "C" DWORD *dc_srcblend; extern "C" DWORD *dc_destblend; +extern "C" fixed_t dc_srcalpha; +extern "C" fixed_t dc_destalpha; // first pixel in a column extern "C" const BYTE* dc_source; diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 872cb4b89..1725b80e4 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -308,6 +308,9 @@ void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { uint32_t fg = shade_pal_index(colormap[*source], light); uint32_t fg_red = (fg >> 16) & 0xff; @@ -318,9 +321,9 @@ void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; @@ -350,6 +353,9 @@ void rt_add4cols_RGBA_c (int sx, int yl, int yh) uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { for (int i = 0; i < 4; i++) { @@ -362,9 +368,9 @@ void rt_add4cols_RGBA_c (int sx, int yl, int yh) uint32_t bg_green = (dest[i] >> 8) & 0xff; uint32_t bg_blue = (dest[i]) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -496,6 +502,9 @@ void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { uint32_t fg = shade_pal_index(colormap[*source], light); uint32_t fg_red = (fg >> 16) & 0xff; @@ -506,9 +515,9 @@ void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; source += 4; @@ -537,6 +546,9 @@ void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { for (int i = 0; i < 4; i++) { @@ -549,9 +561,9 @@ void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) uint32_t bg_green = (dest[i] >> 8) & 0xff; uint32_t bg_blue = (dest[i]) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -595,6 +607,9 @@ void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { uint32_t fg = shade_pal_index(colormap[*source], light); uint32_t fg_red = (fg >> 16) & 0xff; @@ -605,9 +620,9 @@ void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; source += 4; @@ -636,6 +651,9 @@ void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { for (int i = 0; i < 4; i++) { @@ -648,9 +666,9 @@ void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) uint32_t bg_green = (dest[i] >> 8) & 0xff; uint32_t bg_blue = (dest[i]) & 0xff; - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -688,8 +706,6 @@ void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) return; count++; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; @@ -697,6 +713,9 @@ void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { uint32_t fg = shade_pal_index(colormap[*source], light); uint32_t fg_red = (fg >> 16) & 0xff; @@ -707,9 +726,9 @@ void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; source += 4; @@ -731,8 +750,6 @@ void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) return; count++; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; @@ -740,6 +757,9 @@ void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + do { for (int i = 0; i < 4; i++) { @@ -752,9 +772,9 @@ void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) uint32_t bg_green = (dest[i] >> 8) & 0xff; uint32_t bg_blue = (dest[i]) & 0xff; - uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 256; - uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 256; - uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 256; + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } diff --git a/src/r_main.h b/src/r_main.h index 765635e5d..e8be3c1a3 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -82,15 +82,18 @@ extern bool r_dontmaplines; // Change R_CalcTiltedLighting() when this changes. #define GETPALOOKUP(vis,shade) (clamp (((shade)-FLOAT2FIXED(MIN(MAXLIGHTVIS,double(vis))))>>FRACBITS, 0, NUMCOLORMAPS-1)) -// Calculate the light multiplier for ds_light -// This is used instead of GETPALOOKUP when ds_colormap+dc_colormap is set to the base colormap -#define LIGHTSCALE(vis,shade) ((shade)-FLOAT2FIXED(MIN(MAXLIGHTVIS,double(vis)))) +// Calculate the light multiplier for dc_light/ds_light +// This is used instead of GETPALOOKUP when ds_colormap/dc_colormap is set to the base colormap +// Returns a value between 0 and 1 in fixed point +#define LIGHTSCALE(vis,shade) FLOAT2FIXED(clamp((FIXED2DBL(shade) - (MIN(MAXLIGHTVIS,double(vis)))) / NUMCOLORMAPS, 0.0, (NUMCOLORMAPS-1)/(double)NUMCOLORMAPS)) + +// Converts fixedlightlev into a shade value +#define FIXEDLIGHT2SHADE(lightlev) (((lightlev) >> COLORMAPSHIFT) << FRACBITS) // calculates the light constant passed to the shade_pal_index function inline uint32_t calc_light_multiplier(dsfixed_t light) { - // the 0.70 multiplier shouldn't be needed - maybe the palette shades in doom weren't linear? - return (uint32_t)clamp((1.0 - FIXED2DBL(light) / MAXLIGHTVIS * 0.70) * 256 + 0.5, 0.0, 256.0); + return 256 - (light >> (FRACBITS - 8)); } // Calculates a ARGB8 color for the given palette index and light multiplier diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 40e14c020..9805ab200 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -391,8 +391,7 @@ void R_MapTiltedPlane_C (int y, int x1) u = SQWORD(uz*z) + pviewx; v = SQWORD(vz*z) + pviewy; - ds_colormap = tiltlighting[i]; - ds_light = 0; + R_SetDSColorMapLight(tiltlighting[i], 0, 0); fb[i++] = ds_colormap[ds_source[(v >> vshift) | ((u >> ushift) & umask)]]; iz += plane_sz[0]; uz += plane_su[0]; @@ -515,8 +514,7 @@ void R_MapTiltedPlane_RGBA (int y, int x1) u = SQWORD(uz*z) + pviewx; v = SQWORD(vz*z) + pviewy; - ds_colormap = tiltlighting[i]; - ds_light = 0; + R_SetDSColorMapLight(tiltlighting[i], 0, 0); fb[i++] = ds_colormap[ds_source[(v >> vshift) | ((u >> ushift) & umask)]]; iz += plane_sz[0]; uz += plane_su[0]; @@ -1595,14 +1593,13 @@ void R_DrawSkyPlane (visplane_t *pl) bool fakefixed = false; if (fixedcolormap) { - dc_colormap = fixedcolormap; - dc_light = 0; + R_SetColorMapLight(fixedcolormap, 0, 0); } else { fakefixed = true; - fixedcolormap = dc_colormap = NormalLight.Maps; - dc_light = 0; + fixedcolormap = NormalLight.Maps; + R_SetColorMapLight(fixedcolormap, 0, 0); } R_DrawSky (pl); @@ -1685,11 +1682,19 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t GlobVis = r_FloorVisibility / planeheight; ds_light = 0; if (fixedlightlev >= 0) - ds_colormap = basecolormap->Maps + fixedlightlev, plane_shade = false; + { + R_SetDSColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + plane_shade = false; + } else if (fixedcolormap) - ds_colormap = fixedcolormap, plane_shade = false; + { + R_SetDSColorMapLight(fixedcolormap, 0, 0); + plane_shade = false; + } else + { plane_shade = true; + } if (spanfunc != R_FillSpan) { @@ -1702,12 +1707,16 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t spanfunc = R_DrawSpanMaskedTranslucent; dc_srcblend = Col2RGB8[alpha>>10]; dc_destblend = Col2RGB8[(OPAQUE-alpha)>>10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE - alpha; } else { spanfunc = R_DrawSpanMaskedAddClamp; dc_srcblend = Col2RGB8_LessPrecision[alpha>>10]; dc_destblend = Col2RGB8_LessPrecision[FRACUNIT>>10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE - alpha; } } else @@ -1724,12 +1733,16 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t spanfunc = R_DrawSpanTranslucent; dc_srcblend = Col2RGB8[alpha>>10]; dc_destblend = Col2RGB8[(OPAQUE-alpha)>>10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE - alpha; } else { spanfunc = R_DrawSpanAddClamp; dc_srcblend = Col2RGB8_LessPrecision[alpha>>10]; dc_destblend = Col2RGB8_LessPrecision[FRACUNIT>>10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE - alpha; } } else @@ -1846,11 +1859,20 @@ void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t a ds_light = 0; if (fixedlightlev >= 0) - ds_colormap = basecolormap->Maps + fixedlightlev, plane_shade = false; + { + R_SetDSColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + plane_shade = false; + } else if (fixedcolormap) - ds_colormap = fixedcolormap, plane_shade = false; + { + R_SetDSColorMapLight(fixedcolormap, 0, 0); + plane_shade = false; + } else - ds_colormap = basecolormap->Maps, plane_shade = true; + { + R_SetDSColorMapLight(basecolormap->Maps, 0, 0); + plane_shade = true; + } if (!plane_shade) { diff --git a/src/r_segs.cpp b/src/r_segs.cpp index cab97adfc..43590247e 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -177,8 +177,7 @@ static void BlastMaskedColumn (void (*blastfunc)(const BYTE *pixels, const FText // calculate lighting if (fixedcolormap == NULL && fixedlightlev < 0) { - dc_colormap = basecolormap->Maps + (GETPALOOKUP (rw_light, wallshade) << COLORMAPSHIFT); - dc_light = 0; + R_SetColorMapLight(basecolormap->Maps, rw_light, wallshade); } dc_iscale = xs_Fix<16>::ToFix(MaskedSWall[dc_x] * MaskedScaleY); @@ -314,10 +313,9 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) rw_scalestep = ds->iscalestep; if (fixedlightlev >= 0) - dc_colormap = basecolormap->Maps + fixedlightlev; + R_SetColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - dc_colormap = fixedcolormap; - dc_light = 0; + R_SetColorMapLight(fixedcolormap, 0, 0); // find positioning texheight = tex->GetScaledHeightDouble(); @@ -632,10 +630,9 @@ void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover) } if (fixedlightlev >= 0) - dc_colormap = basecolormap->Maps + fixedlightlev; + R_SetColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - dc_colormap = fixedcolormap; - dc_light = 0; + R_SetColorMapLight(fixedcolormap, 0, 0); WallC.sz1 = ds->sz1; WallC.sz2 = ds->sz2; @@ -1435,11 +1432,11 @@ static void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *d } } -inline fixed_t mvline1 (fixed_t vince, BYTE *colormap, int count, fixed_t vplce, const BYTE *bufplce, BYTE *dest) +inline fixed_t mvline1 (fixed_t vince, BYTE *colormap, fixed_t light, int count, fixed_t vplce, const BYTE *bufplce, BYTE *dest) { dc_iscale = vince; dc_colormap = colormap; - dc_light = 0; + dc_light = light; dc_count = count; dc_texturefrac = vplce; dc_source = bufplce; @@ -1508,8 +1505,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ if (!fixed) { // calculate lighting - dc_colormap = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); - dc_light = 0; + R_SetColorMapLight(basecolormapdata, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1569,7 +1565,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ { if (!(bad & 1)) { - mvline1(vince[z],palookupoffse[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); + mvline1(vince[z],palookupoffse[z],palookuplight[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); } bad >>= 1; } @@ -1580,7 +1576,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ { if (u4 > y1ve[z]) { - vplce[z] = mvline1(vince[z],palookupoffse[z],u4-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); + vplce[z] = mvline1(vince[z],palookupoffse[z],palookuplight[z],u4-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); } } @@ -1596,7 +1592,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ { if (y2ve[z] > d4) { - mvline1(vince[z],palookupoffse[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z*pixelsize); + mvline1(vince[z],palookupoffse[0],palookuplight[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z*pixelsize); } } } @@ -1609,8 +1605,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ if (!fixed) { // calculate lighting - dc_colormap = basecolormapdata + (GETPALOOKUP (light, wallshade) << COLORMAPSHIFT); - dc_light = 0; + R_SetColorMapLight(basecolormapdata, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1844,10 +1839,9 @@ void R_RenderSegLoop () fixed_t xoffset = rw_offset; if (fixedlightlev >= 0) - dc_colormap = basecolormap->Maps + fixedlightlev; + R_SetColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - dc_colormap = fixedcolormap; - dc_light = 0; + R_SetColorMapLight(fixedcolormap, 0, 0); // clip wall to the floor and ceiling for (x = x1; x < x2; ++x) @@ -3244,14 +3238,13 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, rw_light = rw_lightleft + (x1 - WallC.sx1) * rw_lightstep; if (fixedlightlev >= 0) - dc_colormap = usecolormap->Maps + fixedlightlev; + R_SetColorMapLight(usecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - dc_colormap = fixedcolormap; + R_SetColorMapLight(fixedcolormap, 0, 0); else if (!foggy && (decal->RenderFlags & RF_FULLBRIGHT)) - dc_colormap = usecolormap->Maps; + R_SetColorMapLight(usecolormap->Maps, 0, 0); else calclighting = true; - dc_light = 0; // Draw it if (decal->RenderFlags & RF_YFLIP) diff --git a/src/r_things.cpp b/src/r_things.cpp index f52c80376..98557817d 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -408,8 +408,7 @@ void R_DrawVisSprite (vissprite_t *vis) } fixed_t centeryfrac = FLOAT2FIXED(CenterY); - dc_colormap = vis->Style.colormap; - dc_light = 0; + R_SetColorMapLight(vis->Style.colormap, 0, 0); mode = R_SetPatchStyle (vis->Style.RenderStyle, vis->Style.Alpha, vis->Translation, vis->FillColor); @@ -539,14 +538,13 @@ void R_DrawWallSprite(vissprite_t *spr) rw_lightstep = float((GlobVis / spr->wallc.sz2 - rw_lightleft) / (spr->wallc.sx2 - spr->wallc.sx1)); rw_light = rw_lightleft + (x1 - spr->wallc.sx1) * rw_lightstep; if (fixedlightlev >= 0) - dc_colormap = usecolormap->Maps + fixedlightlev; + R_SetColorMapLight(usecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - dc_colormap = fixedcolormap; + R_SetColorMapLight(fixedcolormap, 0, 0); else if (!foggy && (spr->renderflags & RF_FULLBRIGHT)) - dc_colormap = usecolormap->Maps; + R_SetColorMapLight(usecolormap->Maps, 0, 0); else calclighting = true; - dc_light = 0; // Draw it WallSpriteTile = spr->pic; @@ -656,8 +654,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop int flags = 0; // Do setup for blending. - dc_colormap = spr->Style.colormap; - dc_light = 0; + R_SetColorMapLight(spr->Style.colormap, 0, 0); mode = R_SetPatchStyle(spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor); if (mode == DontDraw) diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 57fac3cda..ff0427b34 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -171,13 +171,11 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) if (translation != NULL) { - dc_colormap = (lighttable_t *)translation; - dc_light = 0; + R_SetColorMapLight((lighttable_t *)translation, 0, 0); } else { - dc_colormap = identitymap; - dc_light = 0; + R_SetColorMapLight(identitymap, 0, 0); } fixedcolormap = dc_colormap; From 41537a50ab9f9aeb5f07e121ed8d1396dd7d261a Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 2 Jun 2016 16:52:41 +0200 Subject: [PATCH 011/100] Fix true color light calculation bug for decals --- src/r_draw.cpp | 8 ++++---- src/r_drawt_rgba.cpp | 4 ++-- src/r_things.cpp | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 5a314e640..c190c1e73 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -416,7 +416,7 @@ void R_FillAddColumn_RGBA_C() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -481,7 +481,7 @@ void R_FillAddClampColumn_RGBA() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -545,7 +545,7 @@ void R_FillSubClampColumn_RGBA() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -609,7 +609,7 @@ void R_FillRevSubClampColumn_RGBA() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 1725b80e4..60520783d 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -413,7 +413,7 @@ void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh) source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -455,7 +455,7 @@ void rt_shaded4cols_RGBA_c (int sx, int yl, int yh) source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(0)); + uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; diff --git a/src/r_things.cpp b/src/r_things.cpp index 98557817d..3fcefe038 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -416,7 +416,7 @@ void R_DrawVisSprite (vissprite_t *vis) { // For shaded sprites, R_SetPatchStyle sets a dc_colormap to an alpha table, but // it is the brightest one. We need to get back to the proper light level for // this sprite. - dc_colormap += vis->ColormapNum << COLORMAPSHIFT; + R_SetColorMapLight(dc_colormap, 0, vis->ColormapNum << FRACBITS); } if (mode != DontDraw) @@ -2704,9 +2704,9 @@ void R_DrawParticle_RGBA(vissprite_t *vis) uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = (fg_red + bg_red * alpha) / 256; - uint32_t green = (fg_green + bg_green * alpha) / 256; - uint32_t blue = (fg_blue + bg_blue * alpha) / 256; + uint32_t red = (fg_red + bg_red * inv_alpha) / 256; + uint32_t green = (fg_green + bg_green * inv_alpha) / 256; + uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += spacing; From 672b80898b720c03ea10367259cc7b524cc4bead Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 2 Jun 2016 19:26:27 +0200 Subject: [PATCH 012/100] Moved ColormapNum to visstyle_t and changed colormap to BaseColormap --- src/g_shared/a_artifacts.cpp | 3 +- src/r_defs.h | 3 +- src/r_things.cpp | 115 ++++++++++++++++++++--------------- src/r_things.h | 1 - 4 files changed, 70 insertions(+), 52 deletions(-) diff --git a/src/g_shared/a_artifacts.cpp b/src/g_shared/a_artifacts.cpp index d36cdfe65..777d6824a 100644 --- a/src/g_shared/a_artifacts.cpp +++ b/src/g_shared/a_artifacts.cpp @@ -737,7 +737,8 @@ int APowerInvisibility::AlterWeaponSprite (visstyle_t *vis) if ((vis->Alpha < 0.25f && special1 > 0) || (vis->Alpha == 0)) { vis->Alpha = clamp((1.f - float(Strength/100)), 0.f, 1.f); - vis->colormap = SpecialColormaps[INVERSECOLORMAP].Colormap; + vis->BaseColormap = SpecialColormaps[INVERSECOLORMAP].Colormap; + vis->ColormapNum = 0; } return -1; // This item is valid so another one shouldn't reset the translucency } diff --git a/src/r_defs.h b/src/r_defs.h index f27ac2716..8a247a5c0 100644 --- a/src/r_defs.h +++ b/src/r_defs.h @@ -1401,7 +1401,8 @@ typedef BYTE lighttable_t; // This could be wider for >8 bit display. // This encapsulates the fields of vissprite_t that can be altered by AlterWeaponSprite struct visstyle_t { - lighttable_t *colormap; + int ColormapNum; // Which colormap is rendered + lighttable_t *BaseColormap; // Base colormap used together with ColormapNum float Alpha; FRenderStyle RenderStyle; }; diff --git a/src/r_things.cpp b/src/r_things.cpp index 3fcefe038..6f8038148 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -408,7 +408,7 @@ void R_DrawVisSprite (vissprite_t *vis) } fixed_t centeryfrac = FLOAT2FIXED(CenterY); - R_SetColorMapLight(vis->Style.colormap, 0, 0); + R_SetColorMapLight(vis->Style.BaseColormap, 0, vis->Style.ColormapNum << FRACBITS); mode = R_SetPatchStyle (vis->Style.RenderStyle, vis->Style.Alpha, vis->Translation, vis->FillColor); @@ -416,7 +416,7 @@ void R_DrawVisSprite (vissprite_t *vis) { // For shaded sprites, R_SetPatchStyle sets a dc_colormap to an alpha table, but // it is the brightest one. We need to get back to the proper light level for // this sprite. - R_SetColorMapLight(dc_colormap, 0, vis->ColormapNum << FRACBITS); + R_SetColorMapLight(dc_colormap, 0, vis->Style.ColormapNum << FRACBITS); } if (mode != DontDraw) @@ -654,7 +654,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop int flags = 0; // Do setup for blending. - R_SetColorMapLight(spr->Style.colormap, 0, 0); + R_SetColorMapLight(spr->Style.BaseColormap, 0, spr->Style.ColormapNum << FRACBITS); mode = R_SetPatchStyle(spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor); if (mode == DontDraw) @@ -680,7 +680,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop // Render the voxel, either directly to the screen or offscreen. R_DrawVoxel(spr->pa.vpos, spr->pa.vang, spr->gpos, spr->Angle, - spr->xscale, FLOAT2FIXED(spr->yscale), spr->voxel, spr->Style.colormap, cliptop, clipbot, + spr->xscale, FLOAT2FIXED(spr->yscale), spr->voxel, spr->Style.BaseColormap + (spr->Style.ColormapNum << COLORMAPSHIFT), cliptop, clipbot, minslabz, maxslabz, flags); // Blend the voxel, if that's what we need to do. @@ -1058,7 +1058,7 @@ void R_ProjectSprite (AActor *thing, int fakeside, F3DFloor *fakefloor, F3DFloor vis->Style.Alpha = float(thing->Alpha); vis->fakefloor = fakefloor; vis->fakeceiling = fakeceiling; - vis->ColormapNum = 0; + vis->Style.ColormapNum = 0; vis->bInMirror = MirrorFlags & RF_XFLIP; vis->bSplitSprite = false; @@ -1110,7 +1110,8 @@ void R_ProjectSprite (AActor *thing, int fakeside, F3DFloor *fakefloor, F3DFloor // get light level if (fixedcolormap != NULL) { // fixed map - vis->Style.colormap = fixedcolormap; + vis->Style.BaseColormap = fixedcolormap; + vis->Style.ColormapNum = 0; } else { @@ -1120,17 +1121,19 @@ void R_ProjectSprite (AActor *thing, int fakeside, F3DFloor *fakefloor, F3DFloor } if (fixedlightlev >= 0) { - vis->Style.colormap = mybasecolormap->Maps + fixedlightlev; + vis->Style.BaseColormap = mybasecolormap->Maps; + vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; } else if (!foggy && ((renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT))) { // full bright - vis->Style.colormap = mybasecolormap->Maps; + vis->Style.BaseColormap = mybasecolormap->Maps; + vis->Style.ColormapNum = 0; } else { // diminished light - vis->ColormapNum = GETPALOOKUP( + vis->Style.ColormapNum = GETPALOOKUP( r_SpriteVisibility / MAX(tz, MINZ), spriteshade); - vis->Style.colormap = mybasecolormap->Maps + (vis->ColormapNum << COLORMAPSHIFT); + vis->Style.BaseColormap = mybasecolormap->Maps; } } } @@ -1199,14 +1202,13 @@ static void R_ProjectWallSprite(AActor *thing, const DVector3 &pos, FTextureID p vis->Style.Alpha = float(thing->Alpha); vis->fakefloor = NULL; vis->fakeceiling = NULL; - vis->ColormapNum = 0; vis->bInMirror = MirrorFlags & RF_XFLIP; vis->pic = pic; vis->bIsVoxel = false; vis->bWallSprite = true; - vis->ColormapNum = GETPALOOKUP( + vis->Style.ColormapNum = GETPALOOKUP( r_SpriteVisibility / MAX(tz, MINZ), spriteshade); - vis->Style.colormap = basecolormap->Maps + (vis->ColormapNum << COLORMAPSHIFT); + vis->Style.BaseColormap = basecolormap->Maps; vis->wallc = wallc; } @@ -1376,7 +1378,7 @@ void R_DrawPSprite (pspdef_t* psp, int pspnum, AActor *owner, double sx, double vis->yscale = float(pspriteyscale / tex->Scale.Y); vis->Translation = 0; // [RH] Use default colors vis->pic = tex; - vis->ColormapNum = 0; + vis->Style.ColormapNum = 0; if (flip) { @@ -1426,7 +1428,8 @@ void R_DrawPSprite (pspdef_t* psp, int pspnum, AActor *owner, double sx, double if (realfixedcolormap != NULL) { // fixed color - vis->Style.colormap = realfixedcolormap->Colormap; + vis->Style.BaseColormap = realfixedcolormap->Colormap; + vis->Style.ColormapNum = 0; } else { @@ -1436,35 +1439,39 @@ void R_DrawPSprite (pspdef_t* psp, int pspnum, AActor *owner, double sx, double } if (fixedlightlev >= 0) { - vis->Style.colormap = mybasecolormap->Maps + fixedlightlev; + vis->Style.BaseColormap = mybasecolormap->Maps; + vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; } else if (!foggy && psp->state->GetFullbright()) { // full bright - vis->Style.colormap = mybasecolormap->Maps; // [RH] use basecolormap + vis->Style.BaseColormap = mybasecolormap->Maps; // [RH] use basecolormap + vis->Style.ColormapNum = 0; } else { // local light - vis->Style.colormap = mybasecolormap->Maps + (GETPALOOKUP (0, spriteshade) << COLORMAPSHIFT); + vis->Style.BaseColormap = mybasecolormap->Maps; + vis->Style.ColormapNum = GETPALOOKUP(0, spriteshade); } } if (camera->Inventory != NULL) { - lighttable_t *oldcolormap = vis->Style.colormap; + BYTE oldcolormapnum = vis->Style.ColormapNum; + lighttable_t *oldcolormap = vis->Style.BaseColormap; camera->Inventory->AlterWeaponSprite (&vis->Style); - if (vis->Style.colormap != oldcolormap) + if (vis->Style.BaseColormap != oldcolormap || vis->Style.ColormapNum != oldcolormapnum) { // The colormap has changed. Is it one we can easily identify? // If not, then don't bother trying to identify it for // hardware accelerated drawing. - if (vis->Style.colormap < SpecialColormaps[0].Colormap || - vis->Style.colormap > SpecialColormaps.Last().Colormap) + if (vis->Style.BaseColormap < SpecialColormaps[0].Colormap || + vis->Style.BaseColormap > SpecialColormaps.Last().Colormap) { noaccel = true; } // Has the basecolormap changed? If so, we can't hardware accelerate it, // since we don't know what it is anymore. - else if (vis->Style.colormap < mybasecolormap->Maps || - vis->Style.colormap >= mybasecolormap->Maps + NUMCOLORMAPS*256) + else if (vis->Style.BaseColormap < mybasecolormap->Maps || + vis->Style.BaseColormap >= mybasecolormap->Maps + NUMCOLORMAPS*256) { noaccel = true; } @@ -1472,8 +1479,8 @@ void R_DrawPSprite (pspdef_t* psp, int pspnum, AActor *owner, double sx, double } // If we're drawing with a special colormap, but shaders for them are disabled, do // not accelerate. - if (!r_shadercolormaps && (vis->Style.colormap >= SpecialColormaps[0].Colormap && - vis->Style.colormap <= SpecialColormaps.Last().Colormap)) + if (!r_shadercolormaps && (vis->Style.BaseColormap >= SpecialColormaps[0].Colormap && + vis->Style.BaseColormap <= SpecialColormaps.Last().Colormap)) { noaccel = true; } @@ -1495,7 +1502,8 @@ void R_DrawPSprite (pspdef_t* psp, int pspnum, AActor *owner, double sx, double else { colormap_to_use = basecolormap; - vis->Style.colormap = basecolormap->Maps; + vis->Style.BaseColormap = basecolormap->Maps; + vis->Style.ColormapNum = 0; vis->Style.RenderStyle = STYLE_Normal; } @@ -1641,18 +1649,18 @@ void R_DrawRemainingPlayerSprites() FColormapStyle colormapstyle; bool usecolormapstyle = false; - if (vis->Style.colormap >= SpecialColormaps[0].Colormap && - vis->Style.colormap < SpecialColormaps[SpecialColormaps.Size()].Colormap) + if (vis->Style.BaseColormap >= SpecialColormaps[0].Colormap && + vis->Style.BaseColormap < SpecialColormaps[SpecialColormaps.Size()].Colormap) { // Yuck! There needs to be a better way to store colormaps in the vissprite... :( - ptrdiff_t specialmap = (vis->Style.colormap - SpecialColormaps[0].Colormap) / sizeof(FSpecialColormap); + ptrdiff_t specialmap = (vis->Style.BaseColormap - SpecialColormaps[0].Colormap) / sizeof(FSpecialColormap) + vis->Style.ColormapNum; special = &SpecialColormaps[specialmap]; } else if (colormap->Color == PalEntry(255,255,255) && colormap->Desaturate == 0) { overlay = colormap->Fade; - overlay.a = BYTE(((vis->Style.colormap - colormap->Maps) >> 8) * 255 / NUMCOLORMAPS); + overlay.a = BYTE(vis->Style.ColormapNum * 255 / NUMCOLORMAPS); } else { @@ -1660,7 +1668,7 @@ void R_DrawRemainingPlayerSprites() colormapstyle.Color = colormap->Color; colormapstyle.Fade = colormap->Fade; colormapstyle.Desaturate = colormap->Desaturate; - colormapstyle.FadeLevel = ((vis->Style.colormap - colormap->Maps) >> 8) / float(NUMCOLORMAPS); + colormapstyle.FadeLevel = vis->Style.ColormapNum / float(NUMCOLORMAPS); } screen->DrawTexture(vis->pic, viewwindowx + VisPSpritesX1[i], @@ -1904,7 +1912,8 @@ void R_DrawSprite (vissprite_t *spr) int r1, r2; short topclip, botclip; short *clip1, *clip2; - lighttable_t *colormap = spr->Style.colormap; + lighttable_t *colormap = spr->Style.BaseColormap; + int colormapnum = spr->Style.ColormapNum; F3DFloor *rover; FDynamicColormap *mybasecolormap; @@ -2001,17 +2010,19 @@ void R_DrawSprite (vissprite_t *spr) } if (fixedlightlev >= 0) { - spr->Style.colormap = mybasecolormap->Maps + fixedlightlev; + spr->Style.BaseColormap = mybasecolormap->Maps; + spr->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; } else if (!foggy && (spr->renderflags & RF_FULLBRIGHT)) { // full bright - spr->Style.colormap = mybasecolormap->Maps; + spr->Style.BaseColormap = mybasecolormap->Maps; + spr->Style.ColormapNum = 0; } else { // diminished light spriteshade = LIGHT2SHADE(sec->lightlevel + r_actualextralight); - spr->Style.colormap = mybasecolormap->Maps + (GETPALOOKUP ( - r_SpriteVisibility / MAX(MINZ, (double)spr->depth), spriteshade) << COLORMAPSHIFT); + spr->Style.BaseColormap = mybasecolormap->Maps; + spr->Style.ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(MINZ, (double)spr->depth), spriteshade); } } } @@ -2159,7 +2170,8 @@ void R_DrawSprite (vissprite_t *spr) if (topclip >= botclip) { - spr->Style.colormap = colormap; + spr->Style.BaseColormap = colormap; + spr->Style.ColormapNum = colormapnum; return; } @@ -2289,7 +2301,8 @@ void R_DrawSprite (vissprite_t *spr) } if (i == x2) { - spr->Style.colormap = colormap; + spr->Style.BaseColormap = colormap; + spr->Style.ColormapNum = colormapnum; return; } } @@ -2307,7 +2320,8 @@ void R_DrawSprite (vissprite_t *spr) int maxvoxely = spr->gzb > hzb ? INT_MAX : xs_RoundToInt((spr->gzt - hzb) / spr->yscale); R_DrawVisVoxel(spr, minvoxely, maxvoxely, cliptop, clipbot); } - spr->Style.colormap = colormap; + spr->Style.BaseColormap = colormap; + spr->Style.ColormapNum = colormapnum; } // kg3D: @@ -2551,25 +2565,28 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade, vis->renderflags = particle->trans; vis->FakeFlatStat = fakeside; vis->floorclip = 0; - vis->ColormapNum = 0; + vis->Style.ColormapNum = 0; if (fixedlightlev >= 0) { - vis->Style.colormap = map + fixedlightlev; + vis->Style.BaseColormap = map; + vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; } else if (fixedcolormap) { - vis->Style.colormap = fixedcolormap; + vis->Style.BaseColormap = fixedcolormap; + vis->Style.ColormapNum = 0; } else if (particle->bright) { - vis->Style.colormap = map; + vis->Style.BaseColormap = map; + vis->Style.ColormapNum = 0; } else { // Particles are slightly more visible than regular sprites. - vis->ColormapNum = GETPALOOKUP(tiz * r_SpriteVisibility * 0.5, shade); - vis->Style.colormap = map + (vis->ColormapNum << COLORMAPSHIFT); + vis->Style.ColormapNum = GETPALOOKUP(tiz * r_SpriteVisibility * 0.5, shade); + vis->Style.BaseColormap = map; } } @@ -2602,7 +2619,7 @@ void R_DrawParticle_C (vissprite_t *vis) { int spacing; BYTE *dest; - BYTE color = vis->Style.colormap[vis->startfrac]; + BYTE color = vis->Style.BaseColormap[(vis->Style.ColormapNum << COLORMAPSHIFT) + vis->startfrac]; int yl = vis->y1; int ycount = vis->y2 - yl + 1; int x1 = vis->x1; @@ -2668,7 +2685,7 @@ void R_DrawParticle_RGBA(vissprite_t *vis) { int spacing; uint32_t *dest; - BYTE color = vis->Style.colormap[vis->startfrac]; + BYTE color = vis->Style.BaseColormap[vis->startfrac]; int yl = vis->y1; int ycount = vis->y2 - yl + 1; int x1 = vis->x1; @@ -2676,7 +2693,7 @@ void R_DrawParticle_RGBA(vissprite_t *vis) R_DrawMaskedSegsBehindParticle(vis); - uint32_t fg = shade_pal_index(color, calc_light_multiplier(0)); + uint32_t fg = shade_pal_index(color, calc_light_multiplier(LIGHTSCALE(0, vis->Style.ColormapNum << FRACBITS))); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; diff --git a/src/r_things.h b/src/r_things.h index 057b7cfe2..785729b09 100644 --- a/src/r_things.h +++ b/src/r_things.h @@ -86,7 +86,6 @@ struct vissprite_t BYTE bSplitSprite:1; // [RH] Sprite was split by a drawseg BYTE bInMirror:1; // [RH] Sprite is "inside" a mirror BYTE FakeFlatStat; // [RH] which side of fake/floor ceiling sprite is on - BYTE ColormapNum; // Which colormap is rendered (needed for shaded drawer) short renderflags; DWORD Translation; // [RH] for color translation visstyle_t Style; From 02a39ef4576204463474a71beef359848cac032d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 2 Jun 2016 20:05:08 +0200 Subject: [PATCH 013/100] Added bgra support to SDL target --- src/posix/sdl/sdlvideo.cpp | 46 ++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/src/posix/sdl/sdlvideo.cpp b/src/posix/sdl/sdlvideo.cpp index b050097be..26121aa71 100644 --- a/src/posix/sdl/sdlvideo.cpp +++ b/src/posix/sdl/sdlvideo.cpp @@ -28,7 +28,7 @@ class SDLFB : public DFrameBuffer { DECLARE_CLASS(SDLFB, DFrameBuffer) public: - SDLFB (int width, int height, bool fullscreen, SDL_Window *oldwin); + SDLFB (int width, int height, bool bgra, bool fullscreen, SDL_Window *oldwin); ~SDLFB (); bool Lock (bool buffer); @@ -271,7 +271,8 @@ DFrameBuffer *SDLVideo::CreateFrameBuffer (int width, int height, bool bgra, boo { // Reuse the old framebuffer if its attributes are the same SDLFB *fb = static_cast (old); if (fb->Width == width && - fb->Height == height) + fb->Height == height && + fb->Bgra == bgra) { bool fsnow = (SDL_GetWindowFlags (fb->Screen) & SDL_WINDOW_FULLSCREEN_DESKTOP) != 0; @@ -296,7 +297,7 @@ DFrameBuffer *SDLVideo::CreateFrameBuffer (int width, int height, bool bgra, boo flashAmount = 0; } - SDLFB *fb = new SDLFB (width, height, fullscreen, oldwin); + SDLFB *fb = new SDLFB (width, height, bgra, fullscreen, oldwin); // If we could not create the framebuffer, try again with slightly // different parameters in this order: @@ -350,8 +351,8 @@ void SDLVideo::SetWindowedScale (float scale) // FrameBuffer implementation ----------------------------------------------- -SDLFB::SDLFB (int width, int height, bool fullscreen, SDL_Window *oldwin) - : DFrameBuffer (width, height, false) +SDLFB::SDLFB (int width, int height, bool bgra, bool fullscreen, SDL_Window *oldwin) + : DFrameBuffer (width, height, bgra) { int i; @@ -494,7 +495,21 @@ void SDLFB::Update () pitch = Surface->pitch; } - if (NotPaletted) + if (Bgra) + { + if (pitch == Pitch * 4) + { + memcpy(pixels, MemBuffer, Width*Height*4); + } + else + { + for (int y = 0; y < Height; ++y) + { + memcpy((BYTE *)pixels + y*pitch, MemBuffer + y*Pitch*4, Width*4); + } + } + } + else if (NotPaletted) { GPfx.Convert (MemBuffer, Pitch, pixels, pitch, Width, Height, @@ -674,13 +689,20 @@ void SDLFB::ResetSDLRenderer () SDL_SetRenderDrawColor(Renderer, 0, 0, 0, 255); Uint32 fmt; - switch(vid_displaybits) + if (Bgra) { - default: fmt = SDL_PIXELFORMAT_ARGB8888; break; - case 30: fmt = SDL_PIXELFORMAT_ARGB2101010; break; - case 24: fmt = SDL_PIXELFORMAT_RGB888; break; - case 16: fmt = SDL_PIXELFORMAT_RGB565; break; - case 15: fmt = SDL_PIXELFORMAT_ARGB1555; break; + fmt = SDL_PIXELFORMAT_ARGB8888; + } + else + { + switch (vid_displaybits) + { + default: fmt = SDL_PIXELFORMAT_ARGB8888; break; + case 30: fmt = SDL_PIXELFORMAT_ARGB2101010; break; + case 24: fmt = SDL_PIXELFORMAT_RGB888; break; + case 16: fmt = SDL_PIXELFORMAT_RGB565; break; + case 15: fmt = SDL_PIXELFORMAT_ARGB1555; break; + } } Texture = SDL_CreateTexture (Renderer, fmt, SDL_TEXTUREACCESS_STREAMING, Width, Height); From 7142faf41d8cd250a19cd86af9becd0ae2d79e32 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 2 Jun 2016 21:39:44 +0200 Subject: [PATCH 014/100] Minor compile error fixes when X86_ASM is defined --- src/r_draw.cpp | 6 +++--- src/r_draw.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index c190c1e73..2c2c67ad6 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -3025,9 +3025,11 @@ extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *v // wallscan stuff, in C +static int vlinebits; +static int mvlinebits; + #ifndef X86_ASM static DWORD vlinec1 (); -static int vlinebits; DWORD (*dovline1)() = vlinec1; DWORD (*doprevline1)() = vlinec1; @@ -3043,7 +3045,6 @@ void (*dovline4)() = vlinec4; static DWORD mvlinec1(); static void mvlinec4(); -static int mvlinebits; DWORD (*domvline1)() = mvlinec1; void (*domvline4)() = mvlinec4; @@ -4532,7 +4533,6 @@ void R_InitColumnDrawers () rt_tlateaddclamp1col = rt_tlateaddclamp1col_c; rt_tlatesubclamp1col = rt_tlatesubclamp1col_c; rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_c; - rt_map4cols = rt_map4cols_c; rt_subclamp4cols = rt_subclamp4cols_c; rt_revsubclamp4cols = rt_revsubclamp4cols_c; rt_tlate4cols = rt_tlate4cols_c; diff --git a/src/r_draw.h b/src/r_draw.h index f60b2299e..fcaedff47 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -265,6 +265,8 @@ void R_DrawShadedColumnP_C (void); void R_DrawSpanP_C (void); void R_DrawSpanMaskedP_C (void); +#endif + void R_DrawColumnHorizP_RGBA_C (void); void R_DrawColumnP_RGBA_C (void); void R_DrawFuzzColumnP_RGBA_C (void); @@ -274,8 +276,6 @@ void R_DrawSpanP_RGBA_C (void); void R_DrawSpanP_RGBA_SSE (void); void R_DrawSpanMaskedP_RGBA_C (void); -#endif - void R_DrawSpanTranslucentP_C (void); void R_DrawSpanMaskedTranslucentP_C (void); From 6160675e080355f35206af2164306a29be3af4be Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 3 Jun 2016 12:42:08 +0200 Subject: [PATCH 015/100] Added a few more SSE drawers --- src/r_draw.cpp | 17 ++- src/r_draw.h | 4 + src/r_drawt_rgba.cpp | 273 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 289 insertions(+), 5 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 2c2c67ad6..aed4bbeea 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -4364,7 +4364,6 @@ void R_InitColumnDrawers () R_DrawTranslatedColumn = R_DrawTranslatedColumnP_RGBA_C; R_DrawShadedColumn = R_DrawShadedColumnP_RGBA_C; R_DrawSpanMasked = R_DrawSpanMaskedP_RGBA_C; - rt_map4cols = rt_map4cols_RGBA_c; #ifndef NO_SSE R_DrawSpan = R_DrawSpanP_RGBA_SSE; #else @@ -4409,9 +4408,6 @@ void R_InitColumnDrawers () rt_copy1col = rt_copy1col_RGBA_c; rt_copy4cols = rt_copy4cols_RGBA_c; rt_map1col = rt_map1col_RGBA_c; - rt_shaded4cols = rt_shaded4cols_RGBA_c; - rt_add4cols = rt_add4cols_RGBA_c; - rt_addclamp4cols = rt_addclamp4cols_RGBA_c; rt_shaded1col = rt_shaded1col_RGBA_c; rt_add1col = rt_add1col_RGBA_c; rt_addclamp1col = rt_addclamp1col_RGBA_c; @@ -4422,7 +4418,6 @@ void R_InitColumnDrawers () rt_tlateaddclamp1col = rt_tlateaddclamp1col_RGBA_c; rt_tlatesubclamp1col = rt_tlatesubclamp1col_RGBA_c; rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_RGBA_c; - rt_map4cols = rt_map4cols_RGBA_c; rt_subclamp4cols = rt_subclamp4cols_RGBA_c; rt_revsubclamp4cols = rt_revsubclamp4cols_RGBA_c; rt_tlate4cols = rt_tlate4cols_RGBA_c; @@ -4432,6 +4427,18 @@ void R_InitColumnDrawers () rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_RGBA_c; rt_initcols = rt_initcols_rgba; +#ifndef NO_SSE + rt_map4cols = rt_map4cols_RGBA_SSE; + rt_add4cols = rt_add4cols_RGBA_SSE; + rt_addclamp4cols = rt_addclamp4cols_RGBA_SSE; + rt_shaded4cols = rt_shaded4cols_RGBA_SSE; +#else + rt_map4cols = rt_map4cols_RGBA_c; + rt_add4cols = rt_add4cols_RGBA_c; + rt_addclamp4cols = rt_addclamp4cols_RGBA_c; + rt_shaded4cols = rt_shaded4cols_RGBA_c; +#endif + dovline1 = vlinec1_RGBA; doprevline1 = vlinec1_RGBA; domvline1 = mvlinec1_RGBA; diff --git a/src/r_draw.h b/src/r_draw.h index fcaedff47..27a985dcb 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -173,6 +173,7 @@ void rt_copy4cols_RGBA_c (int sx, int yl, int yh); void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh); void rt_shaded4cols_RGBA_c (int sx, int yl, int yh); +void rt_shaded4cols_RGBA_SSE (int sx, int yl, int yh); void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh); void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh); @@ -187,8 +188,11 @@ void rt_tlatesubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); void rt_tlaterevsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); void rt_map4cols_RGBA_c (int sx, int yl, int yh); +void rt_map4cols_RGBA_SSE (int sx, int yl, int yh); void rt_add4cols_RGBA_c (int sx, int yl, int yh); +void rt_add4cols_RGBA_SSE (int sx, int yl, int yh); void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh); +void rt_addclamp4cols_RGBA_SSE (int sx, int yl, int yh); void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh); void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh); diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 60520783d..d390fc54d 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -42,6 +42,9 @@ #include "r_main.h" #include "r_things.h" #include "v_video.h" +#ifndef NO_SSE +#include +#endif uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT*4]; uint32_t *dc_temp_rgba; @@ -185,6 +188,98 @@ void rt_map4cols_RGBA_c (int sx, int yl, int yh) } while (--count); } +// Maps all four spans to the screen starting at sx. +void rt_map4cols_RGBA_SSE(int sx, int yl, int yh) +{ + BYTE *colormap; + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + uint32_t light = calc_light_multiplier(dc_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + + colormap = dc_colormap; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + + if (count & 1) { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + fg_hi = _mm_mullo_epi16(fg_hi, mlight); + fg_hi = _mm_srli_epi16(fg_hi, 8); + fg_lo = _mm_mullo_epi16(fg_lo, mlight); + fg_lo = _mm_srli_epi16(fg_lo, 8); + + fg = _mm_packus_epi16(fg_lo, fg_hi); + _mm_storeu_si128((__m128i*)dest, fg); + + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + // shade_pal_index 0-3 + { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + fg_hi = _mm_mullo_epi16(fg_hi, mlight); + fg_hi = _mm_srli_epi16(fg_hi, 8); + fg_lo = _mm_mullo_epi16(fg_lo, mlight); + fg_lo = _mm_srli_epi16(fg_lo, 8); + + fg = _mm_packus_epi16(fg_lo, fg_hi); + _mm_storeu_si128((__m128i*)dest, fg); + } + + // shade_pal_index 4-7 (pitch) + { + uint32_t p0 = colormap[source[4]]; + uint32_t p1 = colormap[source[5]]; + uint32_t p2 = colormap[source[6]]; + uint32_t p3 = colormap[source[7]]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + fg_hi = _mm_mullo_epi16(fg_hi, mlight); + fg_hi = _mm_srli_epi16(fg_hi, 8); + fg_lo = _mm_mullo_epi16(fg_lo, mlight); + fg_lo = _mm_srli_epi16(fg_lo, 8); + + fg = _mm_packus_epi16(fg_lo, fg_hi); + _mm_storeu_si128((__m128i*)(dest + pitch), fg); + } + + source += 8; + dest += pitch * 2; + } while (--count); +} + void rt_Translate1col_RGBA_c(const BYTE *translation, int hx, int yl, int yh) { int count = yh - yl + 1; @@ -380,6 +475,69 @@ void rt_add4cols_RGBA_c (int sx, int yl, int yh) } while (--count); } +// Adds all four spans to the screen starting at sx without clamping. +#ifndef NO_SSE +void rt_add4cols_RGBA_SSE(int sx, int yl, int yh) +{ + BYTE *colormap; + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + colormap = dc_colormap; + + uint32_t light = calc_light_multiplier(dc_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + fg_hi = _mm_mullo_epi16(fg_hi, mlight); + fg_hi = _mm_srli_epi16(fg_hi, 8); + fg_lo = _mm_mullo_epi16(fg_lo, mlight); + fg_lo = _mm_srli_epi16(fg_lo, 8); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); +} +#endif + // Translates and adds one span at hx to the screen at sx without clamping. void rt_tlateadd1col_RGBA_c (int hx, int sx, int yl, int yh) { @@ -481,6 +639,58 @@ void rt_shaded4cols_RGBA_c (int sx, int yl, int yh) } while (--count); } +// Shades all four spans to the screen starting at sx. +#ifndef NO_SSE +void rt_shaded4cols_RGBA_SSE(int sx, int yl, int yh) +{ + BYTE *colormap; + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + colormap = dc_colormap; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index(dc_color, calc_light_multiplier(dc_light))), _mm_setzero_si128()); + __m128i alpha_one = _mm_set1_epi16(64); + + do { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + __m128i alpha_hi = _mm_set_epi16(64, p3, p3, p3, 64, p2, p2, p2); + __m128i alpha_lo = _mm_set_epi16(64, p1, p1, p1, 64, p0, p0, p0); + __m128i inv_alpha_hi = _mm_subs_epu16(alpha_one, alpha_hi); + __m128i inv_alpha_lo = _mm_subs_epu16(alpha_one, alpha_lo); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * alpha + bg_red * inv_alpha) / 64: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_hi), _mm_mullo_epi16(bg_hi, inv_alpha_hi)), 6); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_lo), _mm_mullo_epi16(bg_lo, inv_alpha_lo)), 6); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); +} +#endif + // Adds one span at hx to the screen at sx with clamping. void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { @@ -572,6 +782,69 @@ void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) } while (--count); } +// Adds all four spans to the screen starting at sx with clamping. +#ifndef NO_SSE +void rt_addclamp4cols_RGBA_SSE(int sx, int yl, int yh) +{ + BYTE *colormap; + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + colormap = dc_colormap; + + uint32_t light = calc_light_multiplier(dc_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + fg_hi = _mm_mullo_epi16(fg_hi, mlight); + fg_hi = _mm_srli_epi16(fg_hi, 8); + fg_lo = _mm_mullo_epi16(fg_lo, mlight); + fg_lo = _mm_srli_epi16(fg_lo, 8); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); +} +#endif + // Translates and adds one span at hx to the screen at sx with clamping. void rt_tlateaddclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { From 373b59b94fa93b78527c50a0af9aea84e09a569b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 3 Jun 2016 14:06:44 +0200 Subject: [PATCH 016/100] Fix dovline4 being a define on X64_ASM --- src/r_draw.cpp | 2 +- src/r_draw.h | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index aed4bbeea..ccaa864e6 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -3036,8 +3036,8 @@ DWORD (*doprevline1)() = vlinec1; #ifdef X64_ASM extern "C" void vlinetallasm4(); -#define dovline4 vlinetallasm4 extern "C" void setupvlinetallasm (int); +void (*dovline4)() = vlinetallasm4; #else static void vlinec4 (); void (*dovline4)() = vlinec4; diff --git a/src/r_draw.h b/src/r_draw.h index 27a985dcb..2eefff9bd 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -71,12 +71,7 @@ extern void (*R_DrawColumn)(void); extern DWORD (*dovline1) (); extern DWORD (*doprevline1) (); -#ifdef X64_ASM -#define dovline4 vlinetallasm4 -extern "C" void vlinetallasm4(); -#else extern void (*dovline4) (); -#endif extern void setupvline (int); extern DWORD (*domvline1) (); From af02bafdeb4a96e091f6ff8608d3d82278bf7c3e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 3 Jun 2016 22:57:36 +0200 Subject: [PATCH 017/100] Fixed missing some columns in transparency rendering --- src/r_draw.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index ccaa864e6..ec0645fd2 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -952,12 +952,14 @@ void R_DrawAddColumnP_RGBA_C() int pitch = dc_pitch; BYTE *colormap = dc_colormap; + uint32_t light = calc_light_multiplier(dc_light); + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], 0); + uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; From 0c8c9e0aeace39987a44183ff16670e5ea967007 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 5 Jun 2016 14:08:03 +0200 Subject: [PATCH 018/100] Added FDynamicColormap support to true color mode --- src/g_level.cpp | 2 +- src/g_shared/a_artifacts.cpp | 2 +- src/r_data/colormaps.cpp | 20 +- src/r_data/colormaps.h | 25 +- src/r_defs.h | 3 +- src/r_draw.cpp | 592 ++++++++++++++++++++++++----------- src/r_draw.h | 19 +- src/r_drawt_rgba.cpp | 427 +++++++++++++++---------- src/r_main.cpp | 8 +- src/r_main.h | 141 ++++++++- src/r_plane.cpp | 12 +- src/r_segs.cpp | 45 ++- src/r_swrenderer.cpp | 2 +- src/r_things.cpp | 79 +++-- src/r_utility.cpp | 4 +- src/v_draw.cpp | 13 +- 16 files changed, 937 insertions(+), 457 deletions(-) diff --git a/src/g_level.cpp b/src/g_level.cpp index 141932c22..d27747ccb 100644 --- a/src/g_level.cpp +++ b/src/g_level.cpp @@ -1307,7 +1307,7 @@ void G_InitLevelLocals () level_info_t *info; BaseBlendA = 0.0f; // Remove underwater blend effect, if any - NormalLight.Maps = realcolormaps; + NormalLight.Maps = realcolormaps.Maps; // [BB] Instead of just setting the color, we also have to reset Desaturate and build the lights. NormalLight.ChangeColor (PalEntry (255, 255, 255), 0); diff --git a/src/g_shared/a_artifacts.cpp b/src/g_shared/a_artifacts.cpp index 777d6824a..305260ebf 100644 --- a/src/g_shared/a_artifacts.cpp +++ b/src/g_shared/a_artifacts.cpp @@ -737,7 +737,7 @@ int APowerInvisibility::AlterWeaponSprite (visstyle_t *vis) if ((vis->Alpha < 0.25f && special1 > 0) || (vis->Alpha == 0)) { vis->Alpha = clamp((1.f - float(Strength/100)), 0.f, 1.f); - vis->BaseColormap = SpecialColormaps[INVERSECOLORMAP].Colormap; + vis->BaseColormap = &SpecialColormaps[INVERSECOLORMAP]; vis->ColormapNum = 0; } return -1; // This item is valid so another one shouldn't reset the translucency diff --git a/src/r_data/colormaps.cpp b/src/r_data/colormaps.cpp index b46342463..ffaaa38ac 100644 --- a/src/r_data/colormaps.cpp +++ b/src/r_data/colormaps.cpp @@ -71,7 +71,7 @@ struct FakeCmap }; TArray fakecmaps; -BYTE *realcolormaps; +FColormap realcolormaps; size_t numfakecmaps; @@ -408,7 +408,7 @@ void R_SetDefaultColormap (const char *name) foo.Color = 0xFFFFFF; foo.Fade = 0; - foo.Maps = realcolormaps; + foo.Maps = realcolormaps.Maps; foo.Desaturate = 0; foo.Next = NULL; foo.BuildLights (); @@ -430,7 +430,7 @@ void R_SetDefaultColormap (const char *name) remap[0] = 0; for (i = 0; i < NUMCOLORMAPS; ++i) { - BYTE *map2 = &realcolormaps[i*256]; + BYTE *map2 = &realcolormaps.Maps[i*256]; lumpr.Read (map, 256); for (j = 0; j < 256; ++j) { @@ -454,11 +454,7 @@ void R_DeinitColormaps () { SpecialColormaps.Clear(); fakecmaps.Clear(); - if (realcolormaps != NULL) - { - delete[] realcolormaps; - realcolormaps = NULL; - } + delete[] realcolormaps.Maps; FreeSpecialLights(); } @@ -501,7 +497,7 @@ void R_InitColormaps () } } } - realcolormaps = new BYTE[256*NUMCOLORMAPS*fakecmaps.Size()]; + realcolormaps.Maps = new BYTE[256*NUMCOLORMAPS*fakecmaps.Size()]; R_SetDefaultColormap ("COLORMAP"); if (fakecmaps.Size() > 1) @@ -523,7 +519,7 @@ void R_InitColormaps () { int k, r, g, b; FWadLump lump = Wads.OpenLumpNum (fakecmaps[j].lump); - BYTE *const map = realcolormaps + NUMCOLORMAPS*256*j; + BYTE *const map = realcolormaps.Maps + NUMCOLORMAPS*256*j; for (k = 0; k < NUMCOLORMAPS; ++k) { @@ -550,8 +546,8 @@ void R_InitColormaps () } NormalLight.Color = PalEntry (255, 255, 255); NormalLight.Fade = 0; - NormalLight.Maps = realcolormaps; - NormalLightHasFixedLights = R_CheckForFixedLights(realcolormaps); + NormalLight.Maps = realcolormaps.Maps; + NormalLightHasFixedLights = R_CheckForFixedLights(realcolormaps.Maps); numfakecmaps = fakecmaps.Size(); // build default special maps (e.g. invulnerability) diff --git a/src/r_data/colormaps.h b/src/r_data/colormaps.h index 0764191a3..bda6a5ea4 100644 --- a/src/r_data/colormaps.h +++ b/src/r_data/colormaps.h @@ -1,18 +1,26 @@ #ifndef __RES_CMAP_H #define __RES_CMAP_H +struct FColormap; + void R_InitColormaps (); void R_DeinitColormaps (); DWORD R_ColormapNumForName(const char *name); // killough 4/4/98 void R_SetDefaultColormap (const char *name); // [RH] change normal fadetable DWORD R_BlendForColormap (DWORD map); // [RH] return calculated blend for a colormap -extern BYTE *realcolormaps; // [RH] make the colormaps externally visible +extern FColormap realcolormaps; // [RH] make the colormaps externally visible extern size_t numfakecmaps; +struct FColormap +{ + BYTE *Maps = nullptr; + PalEntry Color = 0xffffffff; + PalEntry Fade = 0xff000000; + int Desaturate = 0; +}; - -struct FDynamicColormap +struct FDynamicColormap : FColormap { void ChangeFade (PalEntry fadecolor); void ChangeColor (PalEntry lightcolor, int desaturate); @@ -20,10 +28,6 @@ struct FDynamicColormap void BuildLights (); static void RebuildAllLights(); - BYTE *Maps; - PalEntry Color; - PalEntry Fade; - int Desaturate; FDynamicColormap *Next; }; @@ -43,8 +47,13 @@ enum }; -struct FSpecialColormap +struct FSpecialColormap : FColormap { + FSpecialColormap() + { + Maps = Colormap; + } + float ColorizeStart[3]; float ColorizeEnd[3]; BYTE Colormap[256]; diff --git a/src/r_defs.h b/src/r_defs.h index 8a247a5c0..c0f878664 100644 --- a/src/r_defs.h +++ b/src/r_defs.h @@ -1397,12 +1397,13 @@ struct FMiniBSP // typedef BYTE lighttable_t; // This could be wider for >8 bit display. +struct FColormap; // This encapsulates the fields of vissprite_t that can be altered by AlterWeaponSprite struct visstyle_t { int ColormapNum; // Which colormap is rendered - lighttable_t *BaseColormap; // Base colormap used together with ColormapNum + FColormap *BaseColormap; // Base colormap used together with ColormapNum float Alpha; FRenderStyle RenderStyle; }; diff --git a/src/r_draw.cpp b/src/r_draw.cpp index ec0645fd2..2e21c7038 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -145,6 +145,8 @@ extern "C" { int dc_pitch=0xABadCafe; // [RH] Distance between rows lighttable_t* dc_colormap; +FColormap *dc_fcolormap; +ShadeConstants dc_shade_constants; fixed_t dc_light; int dc_x; int dc_yl; @@ -179,6 +181,7 @@ BYTE *dc_translation; BYTE shadetables[NUMCOLORMAPS*16*256]; FDynamicColormap ShadeFakeColormap[16]; BYTE identitymap[256]; +FDynamicColormap identitycolormap; EXTERN_CVAR (Int, r_columnmethod) @@ -219,6 +222,10 @@ void R_InitShadeMaps() { identitymap[i] = i; } + identitycolormap.Color = ~0u; + identitycolormap.Desaturate = 0; + identitycolormap.Next = NULL; + identitycolormap.Maps = identitymap; } /************************************/ @@ -297,6 +304,7 @@ void R_DrawColumnP_RGBA_C() dest = (uint32_t*)dc_dest; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; // Determine scaling, // which is the only mapping to be done. @@ -315,9 +323,7 @@ void R_DrawColumnP_RGBA_C() // This is as fast as it gets. do { - // Re-map color indices from wall texture column - // using a lighting/special effects LUT. - *dest = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + *dest = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); dest += pitch; frac += fracstep; @@ -371,7 +377,7 @@ void R_FillColumnP_RGBA() do { - *dest = shade_pal_index(color, light); + *dest = shade_pal_index_simple(color, light); dest += pitch; } while (--count); } @@ -416,7 +422,7 @@ void R_FillAddColumn_RGBA_C() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -481,7 +487,7 @@ void R_FillAddClampColumn_RGBA() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -545,7 +551,7 @@ void R_FillSubClampColumn_RGBA() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -609,7 +615,7 @@ void R_FillRevSubClampColumn_RGBA() dest = (uint32_t*)dc_dest; int pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -953,13 +959,14 @@ void R_DrawAddColumnP_RGBA_C() BYTE *colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -1032,6 +1039,7 @@ void R_DrawTranslatedColumnP_RGBA_C() return; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; dest = (uint32_t*)dc_dest; @@ -1040,14 +1048,13 @@ void R_DrawTranslatedColumnP_RGBA_C() { // [RH] Local copies of global vars to improve compiler optimizations - BYTE *colormap = dc_colormap; BYTE *translation = dc_translation; const BYTE *source = dc_source; int pitch = dc_pitch; do { - *dest = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + *dest = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); dest += pitch; frac += fracstep; } while (--count); @@ -1106,6 +1113,7 @@ void R_DrawTlatedAddColumnP_RGBA_C() return; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; dest = (uint32_t*)dc_dest; @@ -1114,7 +1122,6 @@ void R_DrawTlatedAddColumnP_RGBA_C() { BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; const BYTE *source = dc_source; int pitch = dc_pitch; @@ -1123,7 +1130,7 @@ void R_DrawTlatedAddColumnP_RGBA_C() do { - uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; @@ -1197,7 +1204,7 @@ void R_DrawShadedColumnP_RGBA_C() fracstep = dc_iscale; frac = dc_texturefrac; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1286,16 +1293,16 @@ void R_DrawAddClampColumnP_RGBA_C() { const BYTE *source = dc_source; - BYTE *colormap = dc_colormap; int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1375,17 +1382,17 @@ void R_DrawAddClampTranslatedColumnP_RGBA_C() { BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; const BYTE *source = dc_source; int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1462,17 +1469,17 @@ void R_DrawSubClampColumnP_RGBA_C() frac = dc_texturefrac; { - BYTE *colormap = dc_colormap; const BYTE *source = dc_source; int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1551,17 +1558,17 @@ void R_DrawSubClampTranslatedColumnP_RGBA_C() { BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; const BYTE *source = dc_source; int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1638,16 +1645,16 @@ void R_DrawRevSubClampColumnP_RGBA_C() frac = dc_texturefrac; { - BYTE *colormap = dc_colormap; const BYTE *source = dc_source; int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light); + uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1726,17 +1733,17 @@ void R_DrawRevSubClampTranslatedColumnP_RGBA_C() { BYTE *translation = dc_translation; - BYTE *colormap = dc_colormap; const BYTE *source = dc_source; int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[translation[source[frac >> FRACBITS]]], light); + uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1784,8 +1791,10 @@ int ds_y; int ds_x1; int ds_x2; +FColormap* ds_fcolormap; lighttable_t* ds_colormap; -//dsfixed_t ds_light; +ShadeConstants ds_shade_constants; +dsfixed_t ds_light; dsfixed_t ds_xfrac; dsfixed_t ds_yfrac; @@ -1835,9 +1844,9 @@ void R_SetSpanSource(const BYTE *pixels) // //========================================================================== -void R_SetSpanColormap(BYTE *colormap) +void R_SetSpanColormap(FDynamicColormap *colormap, int shade) { - R_SetDSColorMapLight(colormap, 0, 0); + R_SetDSColorMapLight(colormap, 0, shade); #ifdef X86_ASM if (!r_swtruecolor && ds_colormap != ds_curcolormap) { @@ -1956,7 +1965,6 @@ void R_DrawSpanP_RGBA_C() dsfixed_t ystep; uint32_t* dest; const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; int count; int spot; @@ -1980,6 +1988,7 @@ void R_DrawSpanP_RGBA_C() ystep = ds_ystep; uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; if (ds_xbits == 6 && ds_ybits == 6) { @@ -1990,9 +1999,8 @@ void R_DrawSpanP_RGBA_C() // Current texture index in u,v. spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - *dest++ = shade_pal_index(colormap[source[spot]], light); + // Lookup pixel from flat texture tile + *dest++ = shade_pal_index(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -2010,9 +2018,8 @@ void R_DrawSpanP_RGBA_C() // Current texture index in u,v. spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - *dest++ = shade_pal_index(colormap[source[spot]], light); + // Lookup pixel from flat texture tile + *dest++ = shade_pal_index(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -2030,7 +2037,6 @@ void R_DrawSpanP_RGBA_SSE() dsfixed_t ystep; uint32_t* dest; const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; int count; int spot; @@ -2054,54 +2060,92 @@ void R_DrawSpanP_RGBA_SSE() ystep = ds_ystep; uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; if (ds_xbits == 6 && ds_ybits == 6) { // 64x64 is the most common case by far, so special case it. - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; int sse_count = count / 4; count -= sse_count * 4; - while (sse_count--) + + if (shade_constants.simple_shade) { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p0 = colormap[source[spot]]; - xfrac += xstep; - yfrac += ystep; + SSE_SHADE_SIMPLE_INIT(light); - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p1 = colormap[source[spot]]; - xfrac += xstep; - yfrac += ystep; + while (sse_count--) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p2 = colormap[source[spot]]; - xfrac += xstep; - yfrac += ystep; + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p3 = colormap[source[spot]]; - xfrac += xstep; - yfrac += ystep; + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - fg_hi = _mm_mullo_epi16(fg_hi, mlight); - fg_hi = _mm_srli_epi16(fg_hi, 8); - fg_lo = _mm_mullo_epi16(fg_lo, mlight); - fg_lo = _mm_srli_epi16(fg_lo, 8); - fg = _mm_packus_epi16(fg_lo, fg_hi); - _mm_storeu_si128((__m128i*)dest, fg); + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; - // Next step in u,v. - dest += 4; + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + + // Next step in u,v. + dest += 4; + } } + else + { + SSE_SHADE_INIT(light, shade_constants); + + while (sse_count--) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + + // Next step in u,v. + dest += 4; + } + } + if (count == 0) return; @@ -2110,9 +2154,8 @@ void R_DrawSpanP_RGBA_SSE() // Current texture index in u,v. spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - *dest++ = shade_pal_index(colormap[source[spot]], light); + // Lookup pixel from flat texture tile + *dest++ = shade_pal_index(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -2130,9 +2173,8 @@ void R_DrawSpanP_RGBA_SSE() // Current texture index in u,v. spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - *dest++ = shade_pal_index(colormap[source[spot]], light); + // Lookup pixel from flat texture tile + *dest++ = shade_pal_index(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -2221,6 +2263,7 @@ void R_DrawSpanMaskedP_RGBA_C() int spot; uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -2243,7 +2286,7 @@ void R_DrawSpanMaskedP_RGBA_C() texdata = source[spot]; if (texdata != 0) { - *dest = shade_pal_index(colormap[texdata], light); + *dest = shade_pal_index(texdata, light, shade_constants); } dest++; xfrac += xstep; @@ -2263,7 +2306,7 @@ void R_DrawSpanMaskedP_RGBA_C() texdata = source[spot]; if (texdata != 0) { - *dest = shade_pal_index(colormap[texdata], light); + *dest = shade_pal_index(texdata, light, shade_constants); } dest++; xfrac += xstep; @@ -2343,7 +2386,6 @@ void R_DrawSpanTranslucentP_RGBA_C() dsfixed_t ystep; uint32_t* dest; const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; int count; int spot; @@ -2358,6 +2400,7 @@ void R_DrawSpanTranslucentP_RGBA_C() ystep = ds_ystep; uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -2369,7 +2412,7 @@ void R_DrawSpanTranslucentP_RGBA_C() { spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t fg = shade_pal_index(colormap[source[spot]], light); + uint32_t fg = shade_pal_index(source[spot], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2397,7 +2440,7 @@ void R_DrawSpanTranslucentP_RGBA_C() { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t fg = shade_pal_index(colormap[source[spot]], light); + uint32_t fg = shade_pal_index(source[spot], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2502,11 +2545,11 @@ void R_DrawSpanMaskedTranslucentP_RGBA_C() dsfixed_t ystep; uint32_t* dest; const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; int count; int spot; uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -2532,7 +2575,7 @@ void R_DrawSpanMaskedTranslucentP_RGBA_C() texdata = source[spot]; if (texdata != 0) { - uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg = shade_pal_index(texdata, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2565,7 +2608,7 @@ void R_DrawSpanMaskedTranslucentP_RGBA_C() texdata = source[spot]; if (texdata != 0) { - uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg = shade_pal_index(texdata, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2665,11 +2708,11 @@ void R_DrawSpanAddClampP_RGBA_C() dsfixed_t ystep; uint32_t* dest; const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; int count; int spot; uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -2691,7 +2734,7 @@ void R_DrawSpanAddClampP_RGBA_C() { spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t fg = shade_pal_index(colormap[source[spot]], light); + uint32_t fg = shade_pal_index(source[spot], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2719,7 +2762,7 @@ void R_DrawSpanAddClampP_RGBA_C() { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t fg = shade_pal_index(colormap[source[spot]], light); + uint32_t fg = shade_pal_index(source[spot], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2830,11 +2873,11 @@ void R_DrawSpanMaskedAddClampP_RGBA_C() dsfixed_t ystep; uint32_t* dest; const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; int count; int spot; uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -2860,7 +2903,7 @@ void R_DrawSpanMaskedAddClampP_RGBA_C() texdata = source[spot]; if (texdata != 0) { - uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg = shade_pal_index(texdata, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2893,7 +2936,7 @@ void R_DrawSpanMaskedAddClampP_RGBA_C() texdata = source[spot]; if (texdata != 0) { - uint32_t fg = shade_pal_index(colormap[texdata], light); + uint32_t fg = shade_pal_index(texdata, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2926,7 +2969,7 @@ void R_FillSpan_RGBA() uint32_t *dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; int count = (ds_x2 - ds_x1 + 1); uint32_t light = calc_light_multiplier(ds_light); - uint32_t color = shade_pal_index(ds_color, light); + uint32_t color = shade_pal_index_simple(ds_color, light); for (int i = 0; i < count; i++) dest[i] = color; } @@ -3147,7 +3190,6 @@ DWORD vlinec1_RGBA() { DWORD fracstep = dc_iscale; DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; uint32_t *dest = (uint32_t*)dc_dest; @@ -3155,10 +3197,11 @@ DWORD vlinec1_RGBA() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; do { - *dest = shade_pal_index(colormap[source[frac >> bits]], light); + *dest = shade_pal_index(source[frac >> bits], light, shade_constants); frac += fracstep; dest += pitch; } while (--count); @@ -3197,12 +3240,14 @@ void vlinec4_RGBA() uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); + ShadeConstants shade_constants = dc_shade_constants; + do { - dest[0] = shade_pal_index(palookupoffse[0][bufplce[0][(place = vplce[0]) >> bits]], light0); vplce[0] = place + vince[0]; - dest[1] = shade_pal_index(palookupoffse[1][bufplce[1][(place = vplce[1]) >> bits]], light1); vplce[1] = place + vince[1]; - dest[2] = shade_pal_index(palookupoffse[2][bufplce[2][(place = vplce[2]) >> bits]], light2); vplce[2] = place + vince[2]; - dest[3] = shade_pal_index(palookupoffse[3][bufplce[3][(place = vplce[3]) >> bits]], light3); vplce[3] = place + vince[3]; + dest[0] = shade_pal_index(bufplce[0][(place = vplce[0]) >> bits], light0, shade_constants); vplce[0] = place + vince[0]; + dest[1] = shade_pal_index(bufplce[1][(place = vplce[1]) >> bits], light1, shade_constants); vplce[1] = place + vince[1]; + dest[2] = shade_pal_index(bufplce[2][(place = vplce[2]) >> bits], light2, shade_constants); vplce[2] = place + vince[2]; + dest[3] = shade_pal_index(bufplce[3][(place = vplce[3]) >> bits], light3, shade_constants); vplce[3] = place + vince[3]; dest += dc_pitch; } while (--count); } @@ -3219,40 +3264,64 @@ void vlinec4_RGBA_SSE() uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); - __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); - __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); + ShadeConstants shade_constants = dc_shade_constants; + uint32_t *palette = (uint32_t*)GPalette.BaseColors; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - do + if (shade_constants.simple_shade) { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; + SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; - BYTE p0 = palookupoffse[0][bufplce[0][place0 >> bits]]; - BYTE p1 = palookupoffse[1][bufplce[1][place1 >> bits]]; - BYTE p2 = palookupoffse[2][bufplce[2][place2 >> bits]]; - BYTE p3 = palookupoffse[3][bufplce[3][place3 >> bits]]; + BYTE p0 = bufplce[0][place0 >> bits]; + BYTE p1 = bufplce[1][place1 >> bits]; + BYTE p2 = bufplce[2][place2 >> bits]; + BYTE p3 = bufplce[3][place3 >> bits]; - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - fg_hi = _mm_mullo_epi16(fg_hi, mlight_hi); - fg_hi = _mm_srli_epi16(fg_hi, 8); - fg_lo = _mm_mullo_epi16(fg_lo, mlight_lo); - fg_lo = _mm_srli_epi16(fg_lo, 8); - fg = _mm_packus_epi16(fg_lo, fg_hi); - _mm_storeu_si128((__m128i*)dest, fg); - dest += dc_pitch; - } while (--count); + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += dc_pitch; + } while (--count); + } + else + { + SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + BYTE p0 = bufplce[0][place0 >> bits]; + BYTE p1 = bufplce[1][place1 >> bits]; + BYTE p2 = bufplce[2][place2 >> bits]; + BYTE p3 = bufplce[3][place3 >> bits]; + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + dest += dc_pitch; + } while (--count); + } // Is this needed? Global variables makes it tricky to know.. vplce[0] = local_vplce[0]; @@ -3323,13 +3392,14 @@ DWORD mvlinec1_RGBA() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; do { BYTE pix = source[frac >> bits]; if (pix != 0) { - *dest = shade_pal_index(colormap[pix], light); + *dest = shade_pal_index(pix, light, shade_constants); } frac += fracstep; dest += pitch; @@ -3370,13 +3440,15 @@ void mvlinec4_RGBA() uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); + ShadeConstants shade_constants = dc_shade_constants; + do { BYTE pix; - pix = bufplce[0][(place = vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(palookupoffse[0][pix], light0); vplce[0] = place + vince[0]; - pix = bufplce[1][(place = vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(palookupoffse[1][pix], light1); vplce[1] = place + vince[1]; - pix = bufplce[2][(place = vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(palookupoffse[2][pix], light2); vplce[2] = place + vince[2]; - pix = bufplce[3][(place = vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(palookupoffse[3][pix], light3); vplce[3] = place + vince[3]; + pix = bufplce[0][(place = vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(pix, light0, shade_constants); vplce[0] = place + vince[0]; + pix = bufplce[1][(place = vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(pix, light1, shade_constants); vplce[1] = place + vince[1]; + pix = bufplce[2][(place = vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(pix, light2, shade_constants); vplce[2] = place + vince[2]; + pix = bufplce[3][(place = vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(pix, light3, shade_constants); vplce[3] = place + vince[3]; dest += dc_pitch; } while (--count); } @@ -3393,48 +3465,70 @@ void mvlinec4_RGBA_SSE() uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); - __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); - __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); + ShadeConstants shade_constants = dc_shade_constants; + uint32_t *palette = (uint32_t*)GPalette.BaseColors; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - do + if (shade_constants.simple_shade) { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; + SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; - BYTE pix0 = bufplce[0][place0 >> bits]; - BYTE pix1 = bufplce[1][place1 >> bits]; - BYTE pix2 = bufplce[2][place2 >> bits]; - BYTE pix3 = bufplce[3][place3 >> bits]; + BYTE pix0 = bufplce[0][place0 >> bits]; + BYTE pix1 = bufplce[1][place1 >> bits]; + BYTE pix2 = bufplce[2][place2 >> bits]; + BYTE pix3 = bufplce[3][place3 >> bits]; - // movemask = !(pix == 0) - __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); + // movemask = !(pix == 0) + __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); - BYTE p0 = palookupoffse[0][pix0]; - BYTE p1 = palookupoffse[1][pix1]; - BYTE p2 = palookupoffse[2][pix2]; - BYTE p3 = palookupoffse[3][pix3]; + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; + __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); + SSE_SHADE_SIMPLE(fg); + _mm_maskmoveu_si128(fg, movemask, (char*)dest); + dest += dc_pitch; + } while (--count); + } + else + { + SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - fg_hi = _mm_mullo_epi16(fg_hi, mlight_hi); - fg_hi = _mm_srli_epi16(fg_hi, 8); - fg_lo = _mm_mullo_epi16(fg_lo, mlight_lo); - fg_lo = _mm_srli_epi16(fg_lo, 8); - fg = _mm_packus_epi16(fg_lo, fg_hi); - _mm_maskmoveu_si128(fg, movemask, (char*)dest); - dest += dc_pitch; - } while (--count); + BYTE pix0 = bufplce[0][place0 >> bits]; + BYTE pix1 = bufplce[1][place1 >> bits]; + BYTE pix2 = bufplce[2][place2 >> bits]; + BYTE pix3 = bufplce[3][place3 >> bits]; + + // movemask = !(pix == 0) + __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); + SSE_SHADE(fg, shade_constants); + _mm_maskmoveu_si128(fg, movemask, (char*)dest); + dest += dc_pitch; + } while (--count); + } // Is this needed? Global variables makes it tricky to know.. vplce[0] = local_vplce[0]; @@ -3503,7 +3597,7 @@ void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip) clearbufshort (spanend+t2, b2-t2, x); } - R_SetColorMapLight(basecolormapdata, (float)light, wallshade); + R_SetColorMapLight(basecolormap, (float)light, wallshade); for (--x; x >= x1; --x) { @@ -3528,7 +3622,7 @@ void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip) clearbufshort (spanend+t2, b2-t2, x); } rcolormap = lcolormap; - R_SetColorMapLight(basecolormapdata, (float)light, wallshade); + R_SetColorMapLight(basecolormap, (float)light, wallshade); } else { @@ -3578,6 +3672,7 @@ static void R_DrawFogBoundarySection_RGBA(int y, int y2, int x1) uint32_t *dest = ylookup[y] + (uint32_t*)dc_destorg; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants constants = dc_shade_constants; for (; y < y2; ++y) { @@ -3585,7 +3680,37 @@ static void R_DrawFogBoundarySection_RGBA(int y, int y2, int x1) int x = x1; do { - dest[x] = shade_pal_index(colormap[dest[x]], light); + uint32_t red = (dest[x] >> 16) & 0xff; + uint32_t green = (dest[x] >> 8) & 0xff; + uint32_t blue = dest[x] & 0xff; + + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; + + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; + + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; + + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + + dest[x] = 0xff000000 | (red << 16) | (green << 8) | blue; } while (++x <= x2); dest += dc_pitch; } @@ -3598,10 +3723,41 @@ static void R_DrawFogBoundaryLine_RGBA(int y, int x) uint32_t *dest = ylookup[y] + (uint32_t*)dc_destorg; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants constants = dc_shade_constants; do { - dest[x] = shade_pal_index(colormap[dest[x]], light); + uint32_t red = (dest[x] >> 16) & 0xff; + uint32_t green = (dest[x] >> 8) & 0xff; + uint32_t blue = dest[x] & 0xff; + + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; + + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; + + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; + + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + + dest[x] = 0xff000000 | (red << 16) | (green << 8) | blue; } while (++x <= x2); } @@ -3627,7 +3783,9 @@ void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip) clearbufshort(spanend + t2, b2 - t2, x); } - R_SetColorMapLight(basecolormapdata, (float)light, wallshade); + R_SetColorMapLight(basecolormap, (float)light, wallshade); + + BYTE *fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); for (--x; x >= x1; --x) { @@ -3652,11 +3810,12 @@ void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip) clearbufshort(spanend + t2, b2 - t2, x); } rcolormap = lcolormap; - R_SetColorMapLight(basecolormapdata, (float)light, wallshade); + R_SetColorMapLight(basecolormap, (float)light, wallshade); + fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); } else { - if (dc_colormap != basecolormapdata) + if (fake_dc_colormap != basecolormapdata) { stop = MIN(t1, b2); while (t2 < stop) @@ -3741,7 +3900,6 @@ fixed_t tmvline1_add_RGBA() { DWORD fracstep = dc_iscale; DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; uint32_t *dest = (uint32_t*)dc_dest; @@ -3749,6 +3907,7 @@ fixed_t tmvline1_add_RGBA() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -3758,7 +3917,7 @@ fixed_t tmvline1_add_RGBA() BYTE pix = source[frac >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(colormap[pix], light); + uint32_t fg = shade_pal_index(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -3825,6 +3984,8 @@ void tmvline4_add_RGBA() light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); + ShadeConstants shade_constants = dc_shade_constants; + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -3835,7 +3996,7 @@ void tmvline4_add_RGBA() BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); + uint32_t fg = shade_pal_index(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -3898,7 +4059,6 @@ fixed_t tmvline1_addclamp_RGBA() { DWORD fracstep = dc_iscale; DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; int count = dc_count; const BYTE *source = dc_source; uint32_t *dest = (uint32_t*)dc_dest; @@ -3906,6 +4066,7 @@ fixed_t tmvline1_addclamp_RGBA() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -3915,7 +4076,7 @@ fixed_t tmvline1_addclamp_RGBA() BYTE pix = source[frac >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(colormap[pix], light); + uint32_t fg = shade_pal_index(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -3981,6 +4142,8 @@ void tmvline4_addclamp_RGBA() light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); + ShadeConstants shade_constants = dc_shade_constants; + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -3991,7 +4154,7 @@ void tmvline4_addclamp_RGBA() BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); + uint32_t fg = shade_pal_index(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -4059,6 +4222,7 @@ fixed_t tmvline1_subclamp_RGBA() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -4068,7 +4232,7 @@ fixed_t tmvline1_subclamp_RGBA() BYTE pix = source[frac >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(colormap[pix], light); + uint32_t fg = shade_pal_index(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -4133,6 +4297,8 @@ void tmvline4_subclamp_RGBA() light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); + ShadeConstants shade_constants = dc_shade_constants; + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -4143,7 +4309,7 @@ void tmvline4_subclamp_RGBA() BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); + uint32_t fg = shade_pal_index(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -4211,6 +4377,7 @@ fixed_t tmvline1_revsubclamp_RGBA() int pitch = dc_pitch; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -4220,7 +4387,7 @@ fixed_t tmvline1_revsubclamp_RGBA() BYTE pix = source[frac >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(colormap[pix], light); + uint32_t fg = shade_pal_index(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -4285,6 +4452,8 @@ void tmvline4_revsubclamp_RGBA() light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); + ShadeConstants shade_constants = dc_shade_constants; + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -4295,7 +4464,7 @@ void tmvline4_revsubclamp_RGBA() BYTE pix = bufplce[i][vplce[i] >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(palookupoffse[i][pix], light[i]); + uint32_t fg = shade_pal_index(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -4793,15 +4962,15 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, colfunc = R_DrawShadedColumn; hcolfunc_post1 = rt_shaded1col; hcolfunc_post4 = rt_shaded4cols; - dc_color = fixedcolormap ? fixedcolormap[APART(color)] : basecolormap->Maps[APART(color)]; - lighttable_t *colormap = (basecolormap = &ShadeFakeColormap[16-alpha])->Maps; + dc_color = fixedcolormap ? fixedcolormap->Maps[APART(color)] : basecolormap->Maps[APART(color)]; + basecolormap = &ShadeFakeColormap[16-alpha]; if (fixedlightlev >= 0 && fixedcolormap == NULL) { - R_SetColorMapLight(colormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); } else { - R_SetColorMapLight(colormap, 0, 0); + R_SetColorMapLight(basecolormap, 0, 0); } return r_columnmethod ? DoDraw1 : DoDraw0; } @@ -4827,7 +4996,7 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, // with the alpha. dc_srccolor = ((((r*x)>>4)<<20) | ((g*x)>>4) | ((((b)*x)>>4)<<10)) & 0x3feffbff; hcolfunc_pre = R_FillColumnHoriz; - R_SetColorMapLight(identitymap, 0, 0); + R_SetColorMapLight(&identitycolormap, 0, 0); } if (!R_SetBlendFunc (style.BlendOp, fglevel, bglevel, style.Flags)) @@ -4871,30 +5040,77 @@ bool R_GetTransMaskDrawers (fixed_t (**tmvline1)(), void (**tmvline4)()) return false; } -void R_SetColorMapLight(BYTE *basecolormapdata, float light, int shade) +void R_SetTranslationMap(lighttable_t *translation) { + dc_fcolormap = nullptr; + dc_shade_constants.light_red = 256; + dc_shade_constants.light_green = 256; + dc_shade_constants.light_blue = 256; + dc_shade_constants.light_alpha = 256; + dc_shade_constants.fade_red = 0; + dc_shade_constants.fade_green = 0; + dc_shade_constants.fade_blue = 0; + dc_shade_constants.fade_alpha = 256; + dc_shade_constants.desaturate = 0; + dc_shade_constants.simple_shade = true; if (r_swtruecolor) { - dc_colormap = basecolormapdata; - dc_light = LIGHTSCALE(light, shade); + dc_colormap = translation; + dc_light = 0; } else { - dc_colormap = basecolormapdata + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); + dc_colormap = translation; dc_light = 0; } } -void R_SetDSColorMapLight(BYTE *basecolormapdata, float light, int shade) +void R_SetColorMapLight(FColormap *base_colormap, float light, int shade) { + dc_fcolormap = base_colormap; + dc_shade_constants.light_red = dc_fcolormap->Color.r * 256 / 255; + dc_shade_constants.light_green = dc_fcolormap->Color.g * 256 / 255; + dc_shade_constants.light_blue = dc_fcolormap->Color.b * 256 / 255; + dc_shade_constants.light_alpha = dc_fcolormap->Color.a * 256 / 255; + dc_shade_constants.fade_red = dc_fcolormap->Fade.r; + dc_shade_constants.fade_green = dc_fcolormap->Fade.g; + dc_shade_constants.fade_blue = dc_fcolormap->Fade.b; + dc_shade_constants.fade_alpha = dc_fcolormap->Fade.a; + dc_shade_constants.desaturate = MIN(std::abs(dc_fcolormap->Desaturate), 255) * 255 / 256; + dc_shade_constants.simple_shade = (dc_fcolormap->Color.d == 0x00ffffff && dc_fcolormap->Fade.d == 0x00000000 && dc_fcolormap->Desaturate == 0); if (r_swtruecolor) { - ds_colormap = basecolormapdata; + dc_colormap = base_colormap->Maps; + dc_light = LIGHTSCALE(light, shade); + } + else + { + dc_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); + dc_light = 0; + } +} + +void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade) +{ + ds_fcolormap = base_colormap; + ds_shade_constants.light_red = ds_fcolormap->Color.r * 256 / 255; + ds_shade_constants.light_green = ds_fcolormap->Color.g * 256 / 255; + ds_shade_constants.light_blue = ds_fcolormap->Color.b * 256 / 255; + ds_shade_constants.light_alpha = ds_fcolormap->Color.a * 256 / 255; + ds_shade_constants.fade_red = ds_fcolormap->Fade.r; + ds_shade_constants.fade_green = ds_fcolormap->Fade.g; + ds_shade_constants.fade_blue = ds_fcolormap->Fade.b; + ds_shade_constants.fade_alpha = ds_fcolormap->Fade.a; + ds_shade_constants.desaturate = MIN(std::abs(ds_fcolormap->Desaturate), 255) * 255 / 256; + ds_shade_constants.simple_shade = (ds_fcolormap->Color.d == 0x00ffffff && ds_fcolormap->Fade.d == 0x00000000 && ds_fcolormap->Desaturate == 0); + if (r_swtruecolor) + { + ds_colormap = base_colormap->Maps; ds_light = LIGHTSCALE(light, shade); } else { - ds_colormap = basecolormapdata + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); + ds_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); ds_light = 0; } } diff --git a/src/r_draw.h b/src/r_draw.h index 2eefff9bd..cc3b10935 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -25,11 +25,16 @@ #include "r_defs.h" +struct FColormap; +struct ShadeConstants; + extern "C" int ylookup[MAXHEIGHT]; extern "C" int dc_pitch; // [RH] Distance between rows extern "C" lighttable_t*dc_colormap; +extern "C" FColormap *dc_fcolormap; +extern "C" ShadeConstants dc_shade_constants; extern "C" fixed_t dc_light; extern "C" int dc_x; extern "C" int dc_yl; @@ -93,7 +98,7 @@ extern void (*R_DrawTranslatedColumn)(void); // Span drawing for rows, floor/ceiling. No Spectre effect needed. extern void (*R_DrawSpan)(void); void R_SetupSpanBits(FTexture *tex); -void R_SetSpanColormap(BYTE *colormap); +void R_SetSpanColormap(FDynamicColormap *colormap, int shade); void R_SetSpanSource(const BYTE *pixels); // Span drawing for masked textures. @@ -321,9 +326,10 @@ extern "C" int ds_y; extern "C" int ds_x1; extern "C" int ds_x2; +extern "C" FColormap* ds_fcolormap; extern "C" lighttable_t* ds_colormap; -//extern "C" dsfixed_t ds_light; -#define ds_light dc_light +extern "C" ShadeConstants ds_shade_constants; +extern "C" dsfixed_t ds_light; extern "C" dsfixed_t ds_xfrac; extern "C" dsfixed_t ds_yfrac; @@ -341,6 +347,7 @@ extern "C" int ds_color; // [RH] For flat color (no texturing) extern BYTE shadetables[/*NUMCOLORMAPS*16*256*/]; extern FDynamicColormap ShadeFakeColormap[16]; extern BYTE identitymap[256]; +extern FDynamicColormap identitycolormap; extern BYTE *dc_translation; // [RH] Added for muliresolution support @@ -389,9 +396,11 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); // Sets dc_colormap and dc_light to their appropriate values depending on the output format (pal vs true color) -void R_SetColorMapLight(BYTE *base_colormap, float light, int shade); +void R_SetColorMapLight(FColormap *base_colormap, float light, int shade); // Same as R_SetColorMapLight, but for ds_colormap and ds_light -void R_SetDSColorMapLight(BYTE *base_colormap, float light, int shade); +void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade); + +void R_SetTranslationMap(lighttable_t *translation); #endif diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index d390fc54d..ff5c0d82f 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -108,7 +108,6 @@ void rt_copy4cols_RGBA_c (int sx, int yl, int yh) // Maps one span at hx to the screen at sx. void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -120,14 +119,14 @@ void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) count++; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; - colormap = dc_colormap; dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; if (count & 1) { - *dest = shade_pal_index(colormap[*source], light); + *dest = shade_pal_index(*source, light, shade_constants); source += 4; dest += pitch; } @@ -135,8 +134,8 @@ void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) return; do { - dest[0] = shade_pal_index(colormap[source[0]], light); - dest[pitch] = shade_pal_index(colormap[source[4]], light); + dest[0] = shade_pal_index(source[0], light, shade_constants); + dest[pitch] = shade_pal_index(source[4], light, shade_constants); source += 8; dest += pitch*2; } while (--count); @@ -145,7 +144,6 @@ void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) // Maps all four spans to the screen starting at sx. void rt_map4cols_RGBA_c (int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -157,17 +155,17 @@ void rt_map4cols_RGBA_c (int sx, int yl, int yh) count++; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; - colormap = dc_colormap; dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; if (count & 1) { - dest[0] = shade_pal_index(colormap[source[0]], light); - dest[1] = shade_pal_index(colormap[source[1]], light); - dest[2] = shade_pal_index(colormap[source[2]], light); - dest[3] = shade_pal_index(colormap[source[3]], light); + dest[0] = shade_pal_index(source[0], light, shade_constants); + dest[1] = shade_pal_index(source[1], light, shade_constants); + dest[2] = shade_pal_index(source[2], light, shade_constants); + dest[3] = shade_pal_index(source[3], light, shade_constants); source += 4; dest += pitch; } @@ -175,14 +173,14 @@ void rt_map4cols_RGBA_c (int sx, int yl, int yh) return; do { - dest[0] = shade_pal_index(colormap[source[0]], light); - dest[1] = shade_pal_index(colormap[source[1]], light); - dest[2] = shade_pal_index(colormap[source[2]], light); - dest[3] = shade_pal_index(colormap[source[3]], light); - dest[pitch] = shade_pal_index(colormap[source[4]], light); - dest[pitch + 1] = shade_pal_index(colormap[source[5]], light); - dest[pitch + 2] = shade_pal_index(colormap[source[6]], light); - dest[pitch + 3] = shade_pal_index(colormap[source[7]], light); + dest[0] = shade_pal_index(source[0], light, shade_constants); + dest[1] = shade_pal_index(source[1], light, shade_constants); + dest[2] = shade_pal_index(source[2], light, shade_constants); + dest[3] = shade_pal_index(source[3], light, shade_constants); + dest[pitch] = shade_pal_index(source[4], light, shade_constants); + dest[pitch + 1] = shade_pal_index(source[5], light, shade_constants); + dest[pitch + 2] = shade_pal_index(source[6], light, shade_constants); + dest[pitch + 3] = shade_pal_index(source[7], light, shade_constants); source += 8; dest += pitch*2; } while (--count); @@ -191,7 +189,6 @@ void rt_map4cols_RGBA_c (int sx, int yl, int yh) // Maps all four spans to the screen starting at sx. void rt_map4cols_RGBA_SSE(int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -202,82 +199,114 @@ void rt_map4cols_RGBA_SSE(int sx, int yl, int yh) return; count++; + ShadeConstants shade_constants = dc_shade_constants; uint32_t light = calc_light_multiplier(dc_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; - colormap = dc_colormap; dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl * 4]; pitch = dc_pitch; - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT(light); - if (count & 1) { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - fg_hi = _mm_mullo_epi16(fg_hi, mlight); - fg_hi = _mm_srli_epi16(fg_hi, 8); - fg_lo = _mm_mullo_epi16(fg_lo, mlight); - fg_lo = _mm_srli_epi16(fg_lo, 8); - - fg = _mm_packus_epi16(fg_lo, fg_hi); - _mm_storeu_si128((__m128i*)dest, fg); - - source += 4; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - // shade_pal_index 0-3 - { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; + if (count & 1) { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + // shade_pal_index: __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - fg_hi = _mm_mullo_epi16(fg_hi, mlight); - fg_hi = _mm_srli_epi16(fg_hi, 8); - fg_lo = _mm_mullo_epi16(fg_lo, mlight); - fg_lo = _mm_srli_epi16(fg_lo, 8); - - fg = _mm_packus_epi16(fg_lo, fg_hi); + SSE_SHADE_SIMPLE(fg); _mm_storeu_si128((__m128i*)dest, fg); + + source += 4; + dest += pitch; } + if (!(count >>= 1)) + return; - // shade_pal_index 4-7 (pitch) - { - uint32_t p0 = colormap[source[4]]; - uint32_t p1 = colormap[source[5]]; - uint32_t p2 = colormap[source[6]]; - uint32_t p3 = colormap[source[7]]; + do { + // shade_pal_index 0-3 + { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + } + + // shade_pal_index 4-7 (pitch) + { + uint32_t p0 = source[4]; + uint32_t p1 = source[5]; + uint32_t p2 = source[6]; + uint32_t p3 = source[7]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)(dest + pitch), fg); + } + + source += 8; + dest += pitch * 2; + } while (--count); + } + else + { + SSE_SHADE_INIT(light, shade_constants); + + if (count & 1) { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - fg_hi = _mm_mullo_epi16(fg_hi, mlight); - fg_hi = _mm_srli_epi16(fg_hi, 8); - fg_lo = _mm_mullo_epi16(fg_lo, mlight); - fg_lo = _mm_srli_epi16(fg_lo, 8); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); - fg = _mm_packus_epi16(fg_lo, fg_hi); - _mm_storeu_si128((__m128i*)(dest + pitch), fg); + source += 4; + dest += pitch; } + if (!(count >>= 1)) + return; - source += 8; - dest += pitch * 2; - } while (--count); + do { + // shade_pal_index 0-3 + { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + } + + // shade_pal_index 4-7 (pitch) + { + uint32_t p0 = source[4]; + uint32_t p1 = source[5]; + uint32_t p2 = source[6]; + uint32_t p3 = source[7]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)(dest + pitch), fg); + } + + source += 8; + dest += pitch * 2; + } while (--count); + } } void rt_Translate1col_RGBA_c(const BYTE *translation, int hx, int yl, int yh) @@ -385,7 +414,6 @@ void rt_tlate4cols_RGBA_c (int sx, int yl, int yh) // Adds one span at hx to the screen at sx without clamping. void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -399,15 +427,15 @@ void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg = shade_pal_index(*source, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -430,7 +458,6 @@ void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) // Adds all four spans to the screen starting at sx without clamping. void rt_add4cols_RGBA_c (int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -444,9 +471,9 @@ void rt_add4cols_RGBA_c (int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -454,7 +481,7 @@ void rt_add4cols_RGBA_c (int sx, int yl, int yh) do { for (int i = 0; i < 4; i++) { - uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg = shade_pal_index(source[i], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -479,7 +506,6 @@ void rt_add4cols_RGBA_c (int sx, int yl, int yh) #ifndef NO_SSE void rt_add4cols_RGBA_SSE(int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -493,7 +519,6 @@ void rt_add4cols_RGBA_SSE(int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl * 4]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; @@ -501,40 +526,80 @@ void rt_add4cols_RGBA_SSE(int sx, int yl, int yh) uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + ShadeConstants shade_constants = dc_shade_constants; - do { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT(light); - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - fg_hi = _mm_mullo_epi16(fg_hi, mlight); - fg_hi = _mm_srli_epi16(fg_hi, 8); - fg_lo = _mm_mullo_epi16(fg_lo, mlight); - fg_lo = _mm_srli_epi16(fg_lo, 8); + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - source += 4; - dest += pitch; - } while (--count); + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } + else + { + SSE_SHADE_INIT(light, shade_constants); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } } #endif @@ -571,7 +636,7 @@ void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh) source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -613,7 +678,7 @@ void rt_shaded4cols_RGBA_c (int sx, int yl, int yh) source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; - uint32_t fg = shade_pal_index(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -659,7 +724,7 @@ void rt_shaded4cols_RGBA_SSE(int sx, int yl, int yh) source = &dc_temp_rgba[yl * 4]; pitch = dc_pitch; - __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index(dc_color, calc_light_multiplier(dc_light))), _mm_setzero_si128()); + __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light))), _mm_setzero_si128()); __m128i alpha_one = _mm_set1_epi16(64); do { @@ -694,7 +759,6 @@ void rt_shaded4cols_RGBA_SSE(int sx, int yl, int yh) // Adds one span at hx to the screen at sx with clamping. void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -708,15 +772,15 @@ void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg = shade_pal_index(*source, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -738,7 +802,6 @@ void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) // Adds all four spans to the screen starting at sx with clamping. void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -752,9 +815,9 @@ void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -762,7 +825,7 @@ void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) do { for (int i = 0; i < 4; i++) { - uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg = shade_pal_index(source[i], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -786,7 +849,6 @@ void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) #ifndef NO_SSE void rt_addclamp4cols_RGBA_SSE(int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -800,7 +862,6 @@ void rt_addclamp4cols_RGBA_SSE(int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl * 4]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; @@ -808,40 +869,80 @@ void rt_addclamp4cols_RGBA_SSE(int sx, int yl, int yh) uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + ShadeConstants shade_constants = dc_shade_constants; - do { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT(light); - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - fg_hi = _mm_mullo_epi16(fg_hi, mlight); - fg_hi = _mm_srli_epi16(fg_hi, 8); - fg_lo = _mm_mullo_epi16(fg_lo, mlight); - fg_lo = _mm_srli_epi16(fg_lo, 8); + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - source += 4; - dest += pitch; - } while (--count); + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } + else + { + SSE_SHADE_INIT(light, shade_constants); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } } #endif @@ -862,7 +963,6 @@ void rt_tlateaddclamp4cols_RGBA_c (int sx, int yl, int yh) // Subtracts one span at hx to the screen at sx with clamping. void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -876,15 +976,15 @@ void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg = shade_pal_index(*source, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -906,7 +1006,6 @@ void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) // Subtracts all four spans to the screen starting at sx with clamping. void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -920,9 +1019,9 @@ void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -930,7 +1029,7 @@ void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) do { for (int i = 0; i < 4; i++) { - uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg = shade_pal_index(source[i], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -968,7 +1067,6 @@ void rt_tlatesubclamp4cols_RGBA_c (int sx, int yl, int yh) // Subtracts one span at hx from the screen at sx with clamping. void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -982,15 +1080,15 @@ void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4 + hx]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[*source], light); + uint32_t fg = shade_pal_index(*source, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1012,7 +1110,6 @@ void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) // Subtracts all four spans from the screen starting at sx with clamping. void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) { - BYTE *colormap; uint32_t *source; uint32_t *dest; int count; @@ -1026,9 +1123,9 @@ void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; source = &dc_temp_rgba[yl*4]; pitch = dc_pitch; - colormap = dc_colormap; uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -1036,7 +1133,7 @@ void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) do { for (int i = 0; i < 4; i++) { - uint32_t fg = shade_pal_index(colormap[source[i]], light); + uint32_t fg = shade_pal_index(source[i], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; diff --git a/src/r_main.cpp b/src/r_main.cpp index aaf8fc532..a795f8016 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -119,7 +119,7 @@ double FocalLengthX; double FocalLengthY; FDynamicColormap*basecolormap; // [RH] colormap currently drawing with int fixedlightlev; -lighttable_t *fixedcolormap; +FColormap *fixedcolormap; FSpecialColormap *realfixedcolormap; double WallTMapScale2; @@ -464,11 +464,11 @@ void R_SetupColormap(player_t *player) // Render everything fullbright. The copy to video memory will // apply the special colormap, so it won't be restricted to the // palette. - fixedcolormap = realcolormaps; + fixedcolormap = &realcolormaps; } else { - fixedcolormap = SpecialColormaps[player->fixedcolormap].Colormap; + fixedcolormap = &SpecialColormaps[player->fixedcolormap]; } } else if (player->fixedlightlevel >= 0 && player->fixedlightlevel < NUMCOLORMAPS) @@ -479,7 +479,7 @@ void R_SetupColormap(player_t *player) // [RH] Inverse light for shooting the Sigil if (fixedcolormap == NULL && extralight == INT_MIN) { - fixedcolormap = SpecialColormaps[INVERSECOLORMAP].Colormap; + fixedcolormap = &SpecialColormaps[INVERSECOLORMAP]; extralight = 0; } } diff --git a/src/r_main.h b/src/r_main.h index e8be3c1a3..0db704df1 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -90,25 +90,162 @@ extern bool r_dontmaplines; // Converts fixedlightlev into a shade value #define FIXEDLIGHT2SHADE(lightlev) (((lightlev) >> COLORMAPSHIFT) << FRACBITS) +struct ShadeConstants +{ + uint16_t light_alpha; + uint16_t light_red; + uint16_t light_green; + uint16_t light_blue; + uint16_t fade_alpha; + uint16_t fade_red; + uint16_t fade_green; + uint16_t fade_blue; + uint16_t desaturate; + bool simple_shade; +}; + // calculates the light constant passed to the shade_pal_index function inline uint32_t calc_light_multiplier(dsfixed_t light) { return 256 - (light >> (FRACBITS - 8)); } +// Give the compiler a strong hint we want these functions inlined: +#ifndef FORCEINLINE +#if defined(_MSC_VER) +#define FORCEINLINE __forceinline +#elif defined(__GNUC__) +#define FORCEINLINE __attribute__((always_inline)) +#else +#define FORCEINLINE inline +#endif +#endif + // Calculates a ARGB8 color for the given palette index and light multiplier -inline uint32_t shade_pal_index(uint32_t index, uint32_t light) +FORCEINLINE uint32_t shade_pal_index_simple(uint32_t index, uint32_t light) { const PalEntry &color = GPalette.BaseColors[index]; uint32_t red = color.r; uint32_t green = color.g; uint32_t blue = color.b; + red = red * light / 256; green = green * light / 256; blue = blue * light / 256; + return 0xff000000 | (red << 16) | (green << 8) | blue; } +// Calculates a ARGB8 color for the given palette index, light multiplier and dynamic colormap +FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants) +{ + const PalEntry &color = GPalette.BaseColors[index]; + uint32_t red = color.r; + uint32_t green = color.g; + uint32_t blue = color.b; + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; + + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; + + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; + + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + +// Calculate constants for a simple shade +#define SSE_SHADE_SIMPLE_INIT(light) \ + __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ + __m128i mlight_lo = mlight_hi; + +// Calculate constants for a simple shade with different light levels for each pixel +#define SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0) \ + __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ + __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); + +// Simple shade 4 pixels +#define SSE_SHADE_SIMPLE(fg) { \ + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \ + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ + fg_hi = _mm_mullo_epi16(fg_hi, mlight_hi); \ + fg_hi = _mm_srli_epi16(fg_hi, 8); \ + fg_lo = _mm_mullo_epi16(fg_lo, mlight_lo); \ + fg_lo = _mm_srli_epi16(fg_lo, 8); \ + fg = _mm_packus_epi16(fg_lo, fg_hi); \ +} + +// Calculate constants for a complex shade +#define SSE_SHADE_INIT(light, shade_constants) \ + __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ + __m128i mlight_lo = mlight_hi; \ + __m128i color = _mm_set_epi16( \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ + __m128i fade = _mm_set_epi16( \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ + __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ + __m128i fade_amount_lo = fade_amount_hi; \ + __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ + +// Calculate constants for a complex shade with different light levels for each pixel +#define SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants) \ + __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ + __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); \ + __m128i color = _mm_set_epi16( \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ + __m128i fade = _mm_set_epi16( \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ + __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ + __m128i fade_amount_lo = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_lo)); \ + __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ + +// Complex shade 4 pixels +#define SSE_SHADE(fg, shade_constants) { \ + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \ + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ + \ + __m128i intensity_hi = _mm_mullo_epi16(fg_hi, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \ + uint16_t intensity_hi0 = ((intensity_hi.m128i_u16[2] + intensity_hi.m128i_u16[1] + intensity_hi.m128i_u16[0]) >> 8) * shade_constants.desaturate; \ + uint16_t intensity_hi1 = ((intensity_hi.m128i_u16[6] + intensity_hi.m128i_u16[5] + intensity_hi.m128i_u16[4]) >> 8) * shade_constants.desaturate; \ + intensity_hi = _mm_set_epi16(intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi0, intensity_hi0, intensity_hi0, intensity_hi0); \ + \ + fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, inv_desaturate), intensity_hi), 8); \ + fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mlight_hi), fade_amount_hi), 8); \ + fg_hi = _mm_srli_epi16(_mm_mullo_epi16(fg_hi, color), 8); \ + \ + __m128i intensity_lo = _mm_mullo_epi16(fg_lo, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \ + uint16_t intensity_lo0 = ((intensity_lo.m128i_u16[2] + intensity_lo.m128i_u16[1] + intensity_lo.m128i_u16[0]) >> 8) * shade_constants.desaturate; \ + uint16_t intensity_lo1 = ((intensity_lo.m128i_u16[6] + intensity_lo.m128i_u16[5] + intensity_lo.m128i_u16[4]) >> 8) * shade_constants.desaturate; \ + intensity_lo = _mm_set_epi16(intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo0, intensity_lo0, intensity_lo0, intensity_lo0); \ + \ + fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, inv_desaturate), intensity_lo), 8); \ + fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mlight_lo), fade_amount_lo), 8); \ + fg_lo = _mm_srli_epi16(_mm_mullo_epi16(fg_lo, color), 8); \ + \ + fg = _mm_packus_epi16(fg_lo, fg_hi); \ +} + extern bool r_swtruecolor; extern double GlobVis; @@ -125,7 +262,7 @@ extern double r_SpriteVisibility; extern int r_actualextralight; extern bool foggy; extern int fixedlightlev; -extern lighttable_t* fixedcolormap; +extern FColormap* fixedcolormap; extern FSpecialColormap*realfixedcolormap; diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 9805ab200..26d579d6d 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -227,7 +227,7 @@ void R_MapPlane (int y, int x1) if (plane_shade) { // Determine lighting based on the span's distance from the viewer. - R_SetDSColorMapLight(basecolormap->Maps, GlobVis * fabs(CenterY - y), planeshade); + R_SetDSColorMapLight(basecolormap, GlobVis * fabs(CenterY - y), planeshade); } #ifdef X86_ASM @@ -616,7 +616,7 @@ void R_MapColoredPlane_RGBA(int y, int x1) uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; int count = (spanend[y] - x1 + 1); uint32_t light = calc_light_multiplier(ds_light); - uint32_t color = shade_pal_index(ds_color, light); + uint32_t color = shade_pal_index_simple(ds_color, light); for (int i = 0; i < count; i++) dest[i] = color; } @@ -1598,7 +1598,7 @@ void R_DrawSkyPlane (visplane_t *pl) else { fakefixed = true; - fixedcolormap = NormalLight.Maps; + fixedcolormap = &NormalLight; R_SetColorMapLight(fixedcolormap, 0, 0); } @@ -1683,7 +1683,7 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t ds_light = 0; if (fixedlightlev >= 0) { - R_SetDSColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); plane_shade = false; } else if (fixedcolormap) @@ -1860,7 +1860,7 @@ void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t a ds_light = 0; if (fixedlightlev >= 0) { - R_SetDSColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); plane_shade = false; } else if (fixedcolormap) @@ -1870,7 +1870,7 @@ void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t a } else { - R_SetDSColorMapLight(basecolormap->Maps, 0, 0); + R_SetDSColorMapLight(basecolormap, 0, 0); plane_shade = true; } diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 43590247e..bd2c7d22b 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -177,7 +177,7 @@ static void BlastMaskedColumn (void (*blastfunc)(const BYTE *pixels, const FText // calculate lighting if (fixedcolormap == NULL && fixedlightlev < 0) { - R_SetColorMapLight(basecolormap->Maps, rw_light, wallshade); + R_SetColorMapLight(basecolormap, rw_light, wallshade); } dc_iscale = xs_Fix<16>::ToFix(MaskedSWall[dc_x] * MaskedScaleY); @@ -313,7 +313,7 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) rw_scalestep = ds->iscalestep; if (fixedlightlev >= 0) - R_SetColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) R_SetColorMapLight(fixedcolormap, 0, 0); @@ -630,7 +630,7 @@ void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover) } if (fixedlightlev >= 0) - R_SetColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) R_SetColorMapLight(fixedcolormap, 0, 0); @@ -1126,6 +1126,11 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l palookuplight[3] = 0; } + if (fixedcolormap) + R_SetColorMapLight(fixedcolormap, 0, 0); + else + R_SetColorMapLight(basecolormap, 0, 0); + for(; (x < x2) && (x & 3); ++x) { light += rw_lightstep; @@ -1137,7 +1142,7 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l if (!fixed) { // calculate lighting - R_SetColorMapLight(basecolormapdata, light, wallshade); + R_SetColorMapLight(basecolormap, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1241,7 +1246,7 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l if (!fixed) { // calculate lighting - R_SetColorMapLight(basecolormapdata, light, wallshade); + R_SetColorMapLight(basecolormap, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1496,6 +1501,11 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ palookuplight[3] = 0; } + if (fixedcolormap) + R_SetColorMapLight(fixedcolormap, 0, 0); + else + R_SetColorMapLight(basecolormap, 0, 0); + for(; (x < x2) && (((size_t)pixel >> pixelshift) & 3); ++x, pixel += pixelsize) { light += rw_lightstep; @@ -1505,7 +1515,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ if (!fixed) { // calculate lighting - R_SetColorMapLight(basecolormapdata, light, wallshade); + R_SetColorMapLight(basecolormap, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1605,7 +1615,7 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ if (!fixed) { // calculate lighting - R_SetColorMapLight(basecolormapdata, light, wallshade); + R_SetColorMapLight(basecolormap, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1690,6 +1700,11 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f palookuplight[3] = 0; } + if (fixedcolormap) + R_SetColorMapLight(fixedcolormap, 0, 0); + else + R_SetColorMapLight(basecolormap, 0, 0); + for(; (x < x2) && (((size_t)pixel >> pixelshift) & 3); ++x, pixel += pixelsize) { light += rw_lightstep; @@ -1699,7 +1714,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f if (!fixed) { // calculate lighting - R_SetColorMapLight(basecolormapdata, light, wallshade); + R_SetColorMapLight(basecolormap, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1801,7 +1816,7 @@ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, f if (!fixed) { // calculate lighting - R_SetColorMapLight(basecolormapdata, light, wallshade); + R_SetColorMapLight(basecolormap, light, wallshade); } dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); @@ -1839,7 +1854,7 @@ void R_RenderSegLoop () fixed_t xoffset = rw_offset; if (fixedlightlev >= 0) - R_SetColorMapLight(basecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) R_SetColorMapLight(fixedcolormap, 0, 0); @@ -3238,11 +3253,11 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, rw_light = rw_lightleft + (x1 - WallC.sx1) * rw_lightstep; if (fixedlightlev >= 0) - R_SetColorMapLight(usecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetColorMapLight(usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) R_SetColorMapLight(fixedcolormap, 0, 0); else if (!foggy && (decal->RenderFlags & RF_FULLBRIGHT)) - R_SetColorMapLight(usecolormap->Maps, 0, 0); + R_SetColorMapLight(usecolormap, 0, 0); else calclighting = true; @@ -3293,7 +3308,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap->Maps, rw_light, wallshade); + R_SetColorMapLight(usecolormap, rw_light, wallshade); } R_WallSpriteColumn (R_DrawMaskedColumn); dc_x++; @@ -3303,7 +3318,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap->Maps, rw_light, wallshade); + R_SetColorMapLight(usecolormap, rw_light, wallshade); } rt_initcols(nullptr); for (int zz = 4; zz; --zz) @@ -3318,7 +3333,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap->Maps, rw_light, wallshade); + R_SetColorMapLight(usecolormap, rw_light, wallshade); } R_WallSpriteColumn (R_DrawMaskedColumn); dc_x++; diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index ee6ac5fed..645741a2a 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -323,7 +323,7 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin // curse Doom's overuse of global variables in the renderer. // These get clobbered by rendering to a camera texture but they need to be preserved so the final rendering can be done with the correct palette. - unsigned char *savecolormap = fixedcolormap; + FColormap *savecolormap = fixedcolormap; FSpecialColormap *savecm = realfixedcolormap; DAngle savedfov = FieldOfView; diff --git a/src/r_things.cpp b/src/r_things.cpp index 6f8038148..c132cc2fd 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -416,7 +416,7 @@ void R_DrawVisSprite (vissprite_t *vis) { // For shaded sprites, R_SetPatchStyle sets a dc_colormap to an alpha table, but // it is the brightest one. We need to get back to the proper light level for // this sprite. - R_SetColorMapLight(dc_colormap, 0, vis->Style.ColormapNum << FRACBITS); + R_SetColorMapLight(dc_fcolormap, 0, vis->Style.ColormapNum << FRACBITS); } if (mode != DontDraw) @@ -538,11 +538,11 @@ void R_DrawWallSprite(vissprite_t *spr) rw_lightstep = float((GlobVis / spr->wallc.sz2 - rw_lightleft) / (spr->wallc.sx2 - spr->wallc.sx1)); rw_light = rw_lightleft + (x1 - spr->wallc.sx1) * rw_lightstep; if (fixedlightlev >= 0) - R_SetColorMapLight(usecolormap->Maps, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + R_SetColorMapLight(usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) R_SetColorMapLight(fixedcolormap, 0, 0); else if (!foggy && (spr->renderflags & RF_FULLBRIGHT)) - R_SetColorMapLight(usecolormap->Maps, 0, 0); + R_SetColorMapLight(usecolormap, 0, 0); else calclighting = true; @@ -593,7 +593,7 @@ void R_DrawWallSprite(vissprite_t *spr) { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap->Maps, rw_light, shade); + R_SetColorMapLight(usecolormap, rw_light, shade); } if (!R_ClipSpriteColumnWithPortals(spr)) R_WallSpriteColumn(R_DrawMaskedColumn); @@ -604,7 +604,7 @@ void R_DrawWallSprite(vissprite_t *spr) { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap->Maps, rw_light, shade); + R_SetColorMapLight(usecolormap, rw_light, shade); } rt_initcols(nullptr); for (int zz = 4; zz; --zz) @@ -620,7 +620,7 @@ void R_DrawWallSprite(vissprite_t *spr) { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap->Maps, rw_light, shade); + R_SetColorMapLight(usecolormap, rw_light, shade); } if (!R_ClipSpriteColumnWithPortals(spr)) R_WallSpriteColumn(R_DrawMaskedColumn); @@ -680,7 +680,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop // Render the voxel, either directly to the screen or offscreen. R_DrawVoxel(spr->pa.vpos, spr->pa.vang, spr->gpos, spr->Angle, - spr->xscale, FLOAT2FIXED(spr->yscale), spr->voxel, spr->Style.BaseColormap + (spr->Style.ColormapNum << COLORMAPSHIFT), cliptop, clipbot, + spr->xscale, FLOAT2FIXED(spr->yscale), spr->voxel, spr->Style.BaseColormap->Maps + (spr->Style.ColormapNum << COLORMAPSHIFT), cliptop, clipbot, minslabz, maxslabz, flags); // Blend the voxel, if that's what we need to do. @@ -1121,19 +1121,19 @@ void R_ProjectSprite (AActor *thing, int fakeside, F3DFloor *fakefloor, F3DFloor } if (fixedlightlev >= 0) { - vis->Style.BaseColormap = mybasecolormap->Maps; + vis->Style.BaseColormap = mybasecolormap; vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; } else if (!foggy && ((renderflags & RF_FULLBRIGHT) || (thing->flags5 & MF5_BRIGHT))) { // full bright - vis->Style.BaseColormap = mybasecolormap->Maps; + vis->Style.BaseColormap = mybasecolormap; vis->Style.ColormapNum = 0; } else { // diminished light vis->Style.ColormapNum = GETPALOOKUP( r_SpriteVisibility / MAX(tz, MINZ), spriteshade); - vis->Style.BaseColormap = mybasecolormap->Maps; + vis->Style.BaseColormap = mybasecolormap; } } } @@ -1208,7 +1208,7 @@ static void R_ProjectWallSprite(AActor *thing, const DVector3 &pos, FTextureID p vis->bWallSprite = true; vis->Style.ColormapNum = GETPALOOKUP( r_SpriteVisibility / MAX(tz, MINZ), spriteshade); - vis->Style.BaseColormap = basecolormap->Maps; + vis->Style.BaseColormap = basecolormap; vis->wallc = wallc; } @@ -1428,7 +1428,7 @@ void R_DrawPSprite (pspdef_t* psp, int pspnum, AActor *owner, double sx, double if (realfixedcolormap != NULL) { // fixed color - vis->Style.BaseColormap = realfixedcolormap->Colormap; + vis->Style.BaseColormap = realfixedcolormap; vis->Style.ColormapNum = 0; } else @@ -1439,39 +1439,38 @@ void R_DrawPSprite (pspdef_t* psp, int pspnum, AActor *owner, double sx, double } if (fixedlightlev >= 0) { - vis->Style.BaseColormap = mybasecolormap->Maps; + vis->Style.BaseColormap = mybasecolormap; vis->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; } else if (!foggy && psp->state->GetFullbright()) { // full bright - vis->Style.BaseColormap = mybasecolormap->Maps; // [RH] use basecolormap + vis->Style.BaseColormap = mybasecolormap; // [RH] use basecolormap vis->Style.ColormapNum = 0; } else { // local light - vis->Style.BaseColormap = mybasecolormap->Maps; + vis->Style.BaseColormap = mybasecolormap; vis->Style.ColormapNum = GETPALOOKUP(0, spriteshade); } } if (camera->Inventory != NULL) { BYTE oldcolormapnum = vis->Style.ColormapNum; - lighttable_t *oldcolormap = vis->Style.BaseColormap; + FColormap *oldcolormap = vis->Style.BaseColormap; camera->Inventory->AlterWeaponSprite (&vis->Style); if (vis->Style.BaseColormap != oldcolormap || vis->Style.ColormapNum != oldcolormapnum) { // The colormap has changed. Is it one we can easily identify? // If not, then don't bother trying to identify it for // hardware accelerated drawing. - if (vis->Style.BaseColormap < SpecialColormaps[0].Colormap || - vis->Style.BaseColormap > SpecialColormaps.Last().Colormap) + if (vis->Style.BaseColormap < &SpecialColormaps[0] || + vis->Style.BaseColormap > &SpecialColormaps.Last()) { noaccel = true; } // Has the basecolormap changed? If so, we can't hardware accelerate it, // since we don't know what it is anymore. - else if (vis->Style.BaseColormap < mybasecolormap->Maps || - vis->Style.BaseColormap >= mybasecolormap->Maps + NUMCOLORMAPS*256) + else if (vis->Style.BaseColormap != mybasecolormap) { noaccel = true; } @@ -1479,13 +1478,13 @@ void R_DrawPSprite (pspdef_t* psp, int pspnum, AActor *owner, double sx, double } // If we're drawing with a special colormap, but shaders for them are disabled, do // not accelerate. - if (!r_shadercolormaps && (vis->Style.BaseColormap >= SpecialColormaps[0].Colormap && - vis->Style.BaseColormap <= SpecialColormaps.Last().Colormap)) + if (!r_shadercolormaps && (vis->Style.BaseColormap >= &SpecialColormaps[0] && + vis->Style.BaseColormap <= &SpecialColormaps.Last())) { noaccel = true; } // If drawing with a BOOM colormap, disable acceleration. - if (mybasecolormap == &NormalLight && NormalLight.Maps != realcolormaps) + if (mybasecolormap == &NormalLight && NormalLight.Maps != realcolormaps.Maps) { noaccel = true; } @@ -1502,7 +1501,7 @@ void R_DrawPSprite (pspdef_t* psp, int pspnum, AActor *owner, double sx, double else { colormap_to_use = basecolormap; - vis->Style.BaseColormap = basecolormap->Maps; + vis->Style.BaseColormap = basecolormap; vis->Style.ColormapNum = 0; vis->Style.RenderStyle = STYLE_Normal; } @@ -1649,12 +1648,10 @@ void R_DrawRemainingPlayerSprites() FColormapStyle colormapstyle; bool usecolormapstyle = false; - if (vis->Style.BaseColormap >= SpecialColormaps[0].Colormap && - vis->Style.BaseColormap < SpecialColormaps[SpecialColormaps.Size()].Colormap) + if (vis->Style.BaseColormap >= &SpecialColormaps[0] && + vis->Style.BaseColormap < &SpecialColormaps[SpecialColormaps.Size()]) { - // Yuck! There needs to be a better way to store colormaps in the vissprite... :( - ptrdiff_t specialmap = (vis->Style.BaseColormap - SpecialColormaps[0].Colormap) / sizeof(FSpecialColormap) + vis->Style.ColormapNum; - special = &SpecialColormaps[specialmap]; + special = static_cast(vis->Style.BaseColormap); } else if (colormap->Color == PalEntry(255,255,255) && colormap->Desaturate == 0) @@ -1912,7 +1909,7 @@ void R_DrawSprite (vissprite_t *spr) int r1, r2; short topclip, botclip; short *clip1, *clip2; - lighttable_t *colormap = spr->Style.BaseColormap; + FColormap *colormap = spr->Style.BaseColormap; int colormapnum = spr->Style.ColormapNum; F3DFloor *rover; FDynamicColormap *mybasecolormap; @@ -2010,18 +2007,18 @@ void R_DrawSprite (vissprite_t *spr) } if (fixedlightlev >= 0) { - spr->Style.BaseColormap = mybasecolormap->Maps; + spr->Style.BaseColormap = mybasecolormap; spr->Style.ColormapNum = fixedlightlev >> COLORMAPSHIFT; } else if (!foggy && (spr->renderflags & RF_FULLBRIGHT)) { // full bright - spr->Style.BaseColormap = mybasecolormap->Maps; + spr->Style.BaseColormap = mybasecolormap; spr->Style.ColormapNum = 0; } else { // diminished light spriteshade = LIGHT2SHADE(sec->lightlevel + r_actualextralight); - spr->Style.BaseColormap = mybasecolormap->Maps; + spr->Style.BaseColormap = mybasecolormap; spr->Style.ColormapNum = GETPALOOKUP(r_SpriteVisibility / MAX(MINZ, (double)spr->depth), spriteshade); } } @@ -2438,7 +2435,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade, int x1, x2, y1, y2; vissprite_t* vis; sector_t* heightsec = NULL; - BYTE* map; + FColormap* map; // [ZZ] Particle not visible through the portal plane if (CurrentPortal && !!P_PointOnLineSide(particle->Pos, CurrentPortal->dst)) @@ -2511,7 +2508,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade, botplane = &heightsec->ceilingplane; toppic = sector->GetTexture(sector_t::ceiling); botpic = heightsec->GetTexture(sector_t::ceiling); - map = heightsec->ColorMap->Maps; + map = heightsec->ColorMap; } else if (fakeside == FAKED_BelowFloor) { @@ -2519,7 +2516,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade, botplane = §or->floorplane; toppic = heightsec->GetTexture(sector_t::floor); botpic = sector->GetTexture(sector_t::floor); - map = heightsec->ColorMap->Maps; + map = heightsec->ColorMap; } else { @@ -2527,7 +2524,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade, botplane = &heightsec->floorplane; toppic = heightsec->GetTexture(sector_t::ceiling); botpic = heightsec->GetTexture(sector_t::floor); - map = sector->ColorMap->Maps; + map = sector->ColorMap; } } else @@ -2536,7 +2533,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade, botplane = §or->floorplane; toppic = sector->GetTexture(sector_t::ceiling); botpic = sector->GetTexture(sector_t::floor); - map = sector->ColorMap->Maps; + map = sector->ColorMap; } if (botpic != skyflatnum && particle->Pos.Z < botplane->ZatPoint (particle->Pos)) @@ -2619,7 +2616,7 @@ void R_DrawParticle_C (vissprite_t *vis) { int spacing; BYTE *dest; - BYTE color = vis->Style.BaseColormap[(vis->Style.ColormapNum << COLORMAPSHIFT) + vis->startfrac]; + BYTE color = vis->Style.BaseColormap->Maps[(vis->Style.ColormapNum << COLORMAPSHIFT) + vis->startfrac]; int yl = vis->y1; int ycount = vis->y2 - yl + 1; int x1 = vis->x1; @@ -2685,7 +2682,7 @@ void R_DrawParticle_RGBA(vissprite_t *vis) { int spacing; uint32_t *dest; - BYTE color = vis->Style.BaseColormap[vis->startfrac]; + BYTE color = vis->Style.BaseColormap->Maps[vis->startfrac]; int yl = vis->y1; int ycount = vis->y2 - yl + 1; int x1 = vis->x1; @@ -2693,7 +2690,7 @@ void R_DrawParticle_RGBA(vissprite_t *vis) R_DrawMaskedSegsBehindParticle(vis); - uint32_t fg = shade_pal_index(color, calc_light_multiplier(LIGHTSCALE(0, vis->Style.ColormapNum << FRACBITS))); + uint32_t fg = shade_pal_index_simple(color, calc_light_multiplier(LIGHTSCALE(0, vis->Style.ColormapNum << FRACBITS))); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; diff --git a/src/r_utility.cpp b/src/r_utility.cpp index 71d3f2376..efc901ca5 100644 --- a/src/r_utility.cpp +++ b/src/r_utility.cpp @@ -889,11 +889,11 @@ void R_SetupFrame (AActor *actor) BaseBlendG = GPART(newblend); BaseBlendB = BPART(newblend); BaseBlendA = APART(newblend) / 255.f; - NormalLight.Maps = realcolormaps; + NormalLight.Maps = realcolormaps.Maps; } else { - NormalLight.Maps = realcolormaps + NUMCOLORMAPS*256*newblend; + NormalLight.Maps = realcolormaps.Maps + NUMCOLORMAPS*256*newblend; BaseBlendR = BaseBlendG = BaseBlendB = 0; BaseBlendA = 0.f; } diff --git a/src/v_draw.cpp b/src/v_draw.cpp index ff0427b34..c2dbf31c5 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -171,14 +171,14 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) if (translation != NULL) { - R_SetColorMapLight((lighttable_t *)translation, 0, 0); + R_SetTranslationMap((lighttable_t *)translation); } else { - R_SetColorMapLight(identitymap, 0, 0); + R_SetTranslationMap(identitymap); } - fixedcolormap = dc_colormap; + fixedcolormap = dc_fcolormap; ESPSResult mode = R_SetPatchStyle (parms.style, parms.Alpha, 0, parms.fillcolor); BYTE *destorgsave = dc_destorg; @@ -1025,7 +1025,7 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) { uint32_t *spot = (uint32_t*)GetBuffer() + oldyyshifted + xx; - uint32_t fg = shade_pal_index(basecolor, calc_light_multiplier(0)); + uint32_t fg = shade_pal_index_simple(basecolor, calc_light_multiplier(0)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1394,7 +1394,10 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, // Setup constant texture mapping parameters. R_SetupSpanBits(tex); - R_SetSpanColormap(colormap != NULL ? &colormap->Maps[clamp(shade >> FRACBITS, 0, NUMCOLORMAPS-1) * 256] : identitymap); + if (colormap) + R_SetSpanColormap(colormap, clamp(shade >> FRACBITS, 0, NUMCOLORMAPS - 1)); + else + R_SetSpanColormap(&identitycolormap, 0); R_SetSpanSource(tex->GetPixels()); scalex = double(1u << (32 - ds_xbits)) / scalex; scaley = double(1u << (32 - ds_ybits)) / scaley; From c058ab9cc9ac17a23a86964387e9f138359935f1 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 5 Jun 2016 17:34:51 +0200 Subject: [PATCH 019/100] Fixed non-standard __m128i usage in SSE_SHADE --- src/r_main.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/r_main.h b/src/r_main.h index 0db704df1..5266fb52c 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -226,8 +226,8 @@ FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const Shade __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ \ __m128i intensity_hi = _mm_mullo_epi16(fg_hi, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \ - uint16_t intensity_hi0 = ((intensity_hi.m128i_u16[2] + intensity_hi.m128i_u16[1] + intensity_hi.m128i_u16[0]) >> 8) * shade_constants.desaturate; \ - uint16_t intensity_hi1 = ((intensity_hi.m128i_u16[6] + intensity_hi.m128i_u16[5] + intensity_hi.m128i_u16[4]) >> 8) * shade_constants.desaturate; \ + uint16_t intensity_hi0 = ((_mm_extract_epi16(intensity_hi, 2) + _mm_extract_epi16(intensity_hi, 1) + _mm_extract_epi16(intensity_hi, 0)) >> 8) * shade_constants.desaturate; \ + uint16_t intensity_hi1 = ((_mm_extract_epi16(intensity_hi, 6) + _mm_extract_epi16(intensity_hi, 5) + _mm_extract_epi16(intensity_hi, 4)) >> 8) * shade_constants.desaturate; \ intensity_hi = _mm_set_epi16(intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi0, intensity_hi0, intensity_hi0, intensity_hi0); \ \ fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, inv_desaturate), intensity_hi), 8); \ @@ -235,8 +235,8 @@ FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const Shade fg_hi = _mm_srli_epi16(_mm_mullo_epi16(fg_hi, color), 8); \ \ __m128i intensity_lo = _mm_mullo_epi16(fg_lo, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \ - uint16_t intensity_lo0 = ((intensity_lo.m128i_u16[2] + intensity_lo.m128i_u16[1] + intensity_lo.m128i_u16[0]) >> 8) * shade_constants.desaturate; \ - uint16_t intensity_lo1 = ((intensity_lo.m128i_u16[6] + intensity_lo.m128i_u16[5] + intensity_lo.m128i_u16[4]) >> 8) * shade_constants.desaturate; \ + uint16_t intensity_lo0 = ((_mm_extract_epi16(intensity_lo, 2) + _mm_extract_epi16(intensity_lo, 1) + _mm_extract_epi16(intensity_lo, 0)) >> 8) * shade_constants.desaturate; \ + uint16_t intensity_lo1 = ((_mm_extract_epi16(intensity_lo, 6) + _mm_extract_epi16(intensity_lo, 5) + _mm_extract_epi16(intensity_lo, 4)) >> 8) * shade_constants.desaturate; \ intensity_lo = _mm_set_epi16(intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo0, intensity_lo0, intensity_lo0, intensity_lo0); \ \ fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, inv_desaturate), intensity_lo), 8); \ From c176d38b7e8992f006a99950a249848ba0a0f039 Mon Sep 17 00:00:00 2001 From: "alexey.lysiuk" Date: Sun, 5 Jun 2016 19:41:08 +0300 Subject: [PATCH 020/100] Fixed compilation with Clang --- src/r_draw.cpp | 4 ++-- src/r_main.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 2e21c7038..e809342e9 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -5076,7 +5076,7 @@ void R_SetColorMapLight(FColormap *base_colormap, float light, int shade) dc_shade_constants.fade_green = dc_fcolormap->Fade.g; dc_shade_constants.fade_blue = dc_fcolormap->Fade.b; dc_shade_constants.fade_alpha = dc_fcolormap->Fade.a; - dc_shade_constants.desaturate = MIN(std::abs(dc_fcolormap->Desaturate), 255) * 255 / 256; + dc_shade_constants.desaturate = MIN(abs(dc_fcolormap->Desaturate), 255) * 255 / 256; dc_shade_constants.simple_shade = (dc_fcolormap->Color.d == 0x00ffffff && dc_fcolormap->Fade.d == 0x00000000 && dc_fcolormap->Desaturate == 0); if (r_swtruecolor) { @@ -5101,7 +5101,7 @@ void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade) ds_shade_constants.fade_green = ds_fcolormap->Fade.g; ds_shade_constants.fade_blue = ds_fcolormap->Fade.b; ds_shade_constants.fade_alpha = ds_fcolormap->Fade.a; - ds_shade_constants.desaturate = MIN(std::abs(ds_fcolormap->Desaturate), 255) * 255 / 256; + ds_shade_constants.desaturate = MIN(abs(ds_fcolormap->Desaturate), 255) * 255 / 256; ds_shade_constants.simple_shade = (ds_fcolormap->Color.d == 0x00ffffff && ds_fcolormap->Fade.d == 0x00000000 && ds_fcolormap->Desaturate == 0); if (r_swtruecolor) { diff --git a/src/r_main.h b/src/r_main.h index 5266fb52c..5d4ff1174 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -115,7 +115,7 @@ inline uint32_t calc_light_multiplier(dsfixed_t light) #if defined(_MSC_VER) #define FORCEINLINE __forceinline #elif defined(__GNUC__) -#define FORCEINLINE __attribute__((always_inline)) +#define FORCEINLINE __attribute__((always_inline)) inline #else #define FORCEINLINE inline #endif From c5fcfb664f210996eadc22e20c036b50b4064abb Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 7 Jun 2016 00:55:52 +0200 Subject: [PATCH 021/100] Changed rgba renderer to use a command queue system for its drawers --- src/CMakeLists.txt | 1 + src/r_draw.cpp | 2273 +-------------------------- src/r_draw.h | 95 +- src/r_draw_rgba.cpp | 3492 ++++++++++++++++++++++++++++++++++++++++++ src/r_drawt.cpp | 8 + src/r_drawt_rgba.cpp | 2633 ++++++++++++++++++------------- src/r_main.cpp | 2 + src/r_swrenderer.cpp | 1 + src/r_things.cpp | 5 +- 9 files changed, 5189 insertions(+), 3321 deletions(-) create mode 100644 src/r_draw_rgba.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c90756b5d..75cf27cad 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -882,6 +882,7 @@ set( FASTMATH_PCH_SOURCES r_3dfloors.cpp r_bsp.cpp r_draw.cpp + r_draw_rgba.cpp r_drawt.cpp r_drawt_rgba.cpp r_main.cpp diff --git a/src/r_draw.cpp b/src/r_draw.cpp index e809342e9..984a74f3f 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -43,9 +43,6 @@ #include "gi.h" #include "stats.h" #include "x86.h" -#ifndef NO_SSE -#include -#endif #undef RANGECHECK @@ -135,6 +132,7 @@ void (*rt_tlateaddclamp4cols)(int sx, int yl, int yh); void (*rt_tlatesubclamp4cols)(int sx, int yl, int yh); void (*rt_tlaterevsubclamp4cols)(int sx, int yl, int yh); void (*rt_initcols)(BYTE *buffer); +void (*rt_span_coverage)(int x, int start, int stop); // // R_DrawColumn @@ -287,51 +285,6 @@ void R_DrawColumnP_C (void) } #endif -void R_DrawColumnP_RGBA_C() -{ - int count; - uint32_t* dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - - // Zero length, column does not exceed a pixel. - if (count <= 0) - return; - - // Framebuffer destination address. - dest = (uint32_t*)dc_dest; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - // Determine scaling, - // which is the only mapping to be done. - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - // [RH] Get local copies of these variables so that the compiler - // has a better chance of optimizing this well. - BYTE *colormap = dc_colormap; - const BYTE *source = dc_source; - int pitch = dc_pitch; - - // Inner loop that does the actual texture mapping, - // e.g. a DDA-lile scaling. - // This is as fast as it gets. - do - { - *dest = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); - - dest += pitch; - frac += fracstep; - - } while (--count); - } -} - // [RH] Just fills a column with a color void R_FillColumnP_C (void) { @@ -357,32 +310,6 @@ void R_FillColumnP_C (void) } } -void R_FillColumnP_RGBA() -{ - int count; - uint32_t* dest; - - count = dc_count; - - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - uint32_t light = calc_light_multiplier(dc_light); - - { - int pitch = dc_pitch; - BYTE color = dc_color; - - do - { - *dest = shade_pal_index_simple(color, light); - dest += pitch; - } while (--count); - } -} - void R_FillAddColumn_C (void) { int count; @@ -410,38 +337,6 @@ void R_FillAddColumn_C (void) } while (--count); } -void R_FillAddColumn_RGBA_C() -{ - int count; - uint32_t *dest; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - int pitch = dc_pitch; - - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); - uint32_t fg_red = (fg >> 24) & 0xff; - uint32_t fg_green = (fg >> 16) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do - { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); -} - void R_FillAddClampColumn_C (void) { int count; @@ -475,38 +370,6 @@ void R_FillAddClampColumn_C (void) } while (--count); } -void R_FillAddClampColumn_RGBA() -{ - int count; - uint32_t *dest; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - int pitch = dc_pitch; - - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); - uint32_t fg_red = (fg >> 24) & 0xff; - uint32_t fg_green = (fg >> 16) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do - { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); -} - void R_FillSubClampColumn_C (void) { int count; @@ -539,38 +402,6 @@ void R_FillSubClampColumn_C (void) } while (--count); } -void R_FillSubClampColumn_RGBA() -{ - int count; - uint32_t *dest; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - int pitch = dc_pitch; - - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); - uint32_t fg_red = (fg >> 24) & 0xff; - uint32_t fg_green = (fg >> 16) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do - { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 255; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 255; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 255; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); -} - void R_FillRevSubClampColumn_C (void) { int count; @@ -603,42 +434,9 @@ void R_FillRevSubClampColumn_C (void) } while (--count); } -void R_FillRevSubClampColumn_RGBA() -{ - int count; - uint32_t *dest; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - int pitch = dc_pitch; - - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); - uint32_t fg_red = (fg >> 24) & 0xff; - uint32_t fg_green = (fg >> 16) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do - { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 255; - uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 255; - uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 255; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); -} - // // Spectre/Invisibility. // -#define FUZZTABLE 50 extern "C" { @@ -754,105 +552,6 @@ void R_DrawFuzzColumnP_C (void) } #endif -void R_DrawFuzzColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - - // Adjust borders. Low... - if (dc_yl == 0) - dc_yl = 1; - - // .. and high. - if (dc_yh > fuzzviewheight) - dc_yh = fuzzviewheight; - - count = dc_yh - dc_yl; - - // Zero length. - if (count < 0) - return; - - count++; - - dest = ylookup[dc_yl] + dc_x + (uint32_t*)dc_destorg; - - // Note: this implementation assumes this function is only used for the pinky shadow effect (i.e. no other fancy colormap than black) - // I'm not sure if this is really always the case or not. - - { - // [RH] Make local copies of global vars to try and improve - // the optimizations made by the compiler. - int pitch = dc_pitch; - int fuzz = fuzzpos; - int cnt; - - // [RH] Split this into three separate loops to minimize - // the number of times fuzzpos needs to be clamped. - if (fuzz) - { - cnt = MIN(FUZZTABLE - fuzz, count); - count -= cnt; - do - { - uint32_t bg = dest[fuzzoffset[fuzz++]]; - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = (bg) & 0xff; - - uint32_t red = bg_red * 3 / 4; - uint32_t green = bg_green * 3 / 4; - uint32_t blue = bg_blue * 3 / 4; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--cnt); - } - if (fuzz == FUZZTABLE || count > 0) - { - while (count >= FUZZTABLE) - { - fuzz = 0; - cnt = FUZZTABLE; - count -= FUZZTABLE; - do - { - uint32_t bg = dest[fuzzoffset[fuzz++]]; - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = (bg) & 0xff; - - uint32_t red = bg_red * 3 / 4; - uint32_t green = bg_green * 3 / 4; - uint32_t blue = bg_blue * 3 / 4; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--cnt); - } - fuzz = 0; - if (count > 0) - { - do - { - uint32_t bg = dest[fuzzoffset[fuzz++]]; - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = (bg) & 0xff; - - uint32_t red = bg_red * 3 / 4; - uint32_t green = bg_green * 3 / 4; - uint32_t blue = bg_blue * 3 / 4; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); - } - } - fuzzpos = fuzz; - } -} - // // R_DrawTranlucentColumn // @@ -937,56 +636,6 @@ void R_DrawAddColumnP_C (void) } } -void R_DrawAddColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - const BYTE *source = dc_source; - int pitch = dc_pitch; - BYTE *colormap = dc_colormap; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); - - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // // R_DrawTranslatedColumn // Used to draw player sprites with the green colorramp mapped to others. @@ -1027,40 +676,6 @@ void R_DrawTranslatedColumnP_C (void) } } -void R_DrawTranslatedColumnP_RGBA_C() -{ - int count; - uint32_t* dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - // [RH] Local copies of global vars to improve compiler optimizations - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch; - - do - { - *dest = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // Draw a column that is both translated and translucent void R_DrawTlatedAddColumnP_C() { @@ -1101,56 +716,6 @@ void R_DrawTlatedAddColumnP_C() } } -void R_DrawTlatedAddColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // Draw a column whose "color" values are actually translucency // levels for a base color stored in dc_color. void R_DrawShadedColumnP_C (void) @@ -1188,52 +753,6 @@ void R_DrawShadedColumnP_C (void) } } -void R_DrawShadedColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac, fracstep; - - count = dc_count; - - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - { - const BYTE *source = dc_source; - BYTE *colormap = dc_colormap; - int pitch = dc_pitch; - - do - { - DWORD alpha = clamp(colormap[source[frac >> FRACBITS]], 0, 64); - DWORD inv_alpha = 64 - alpha; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; - uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; - uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // Add source to destination, clamping it to white void R_DrawAddClampColumnP_C () { @@ -1275,53 +794,6 @@ void R_DrawAddClampColumnP_C () } } -void R_DrawAddClampColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - const BYTE *source = dc_source; - int pitch = dc_pitch; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // Add translated source to destination, clamping it to white void R_DrawAddClampTranslatedColumnP_C () { @@ -1364,54 +836,6 @@ void R_DrawAddClampTranslatedColumnP_C () } } -void R_DrawAddClampTranslatedColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // Subtract destination from source, clamping it to black void R_DrawSubClampColumnP_C () { @@ -1452,53 +876,6 @@ void R_DrawSubClampColumnP_C () } } -void R_DrawSubClampColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - const BYTE *source = dc_source; - int pitch = dc_pitch; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // Subtract destination from source, clamping it to black void R_DrawSubClampTranslatedColumnP_C () { @@ -1540,54 +917,6 @@ void R_DrawSubClampTranslatedColumnP_C () } } -void R_DrawSubClampTranslatedColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // Subtract source from destination, clamping it to black void R_DrawRevSubClampColumnP_C () { @@ -1628,52 +957,6 @@ void R_DrawRevSubClampColumnP_C () } } -void R_DrawRevSubClampColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - const BYTE *source = dc_source; - int pitch = dc_pitch; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - // Subtract source from destination, clamping it to black void R_DrawRevSubClampTranslatedColumnP_C () { @@ -1715,55 +998,6 @@ void R_DrawRevSubClampTranslatedColumnP_C () } } -void R_DrawRevSubClampTranslatedColumnP_RGBA_C() -{ - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = dc_count; - if (count <= 0) - return; - - dest = (uint32_t*)dc_dest; - - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } -} - - // // R_DrawSpan // With DOOM style restrictions on view orientation, @@ -1957,233 +1191,6 @@ void R_DrawSpanP_C (void) } #endif -void R_DrawSpanP_RGBA_C() -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const BYTE* source = ds_source; - int count; - int spot; - -#ifdef RANGECHECK - if (ds_x2 < ds_x1 || ds_x1 < 0 - || ds_x2 >= screen->width || ds_y > screen->height) - { - I_Error("R_DrawSpan: %i to %i at %i", ds_x1, ds_x2, ds_y); - } - // dscount++; -#endif - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - - do - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - // Lookup pixel from flat texture tile - *dest++ = shade_pal_index(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - // Lookup pixel from flat texture tile - *dest++ = shade_pal_index(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - -#ifndef NO_SSE -void R_DrawSpanP_RGBA_SSE() -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const BYTE* source = ds_source; - int count; - int spot; - -#ifdef RANGECHECK - if (ds_x2 < ds_x1 || ds_x1 < 0 - || ds_x2 >= screen->width || ds_y > screen->height) - { - I_Error("R_DrawSpan: %i to %i at %i", ds_x1, ds_x2, ds_y); - } - // dscount++; -#endif - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - - int sse_count = count / 4; - count -= sse_count * 4; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT(light); - - while (sse_count--) - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - - // Next step in u,v. - dest += 4; - } - } - else - { - SSE_SHADE_INIT(light, shade_constants); - - while (sse_count--) - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - - // Next step in u,v. - dest += 4; - } - } - - if (count == 0) - return; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - // Lookup pixel from flat texture tile - *dest++ = shade_pal_index(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - // Lookup pixel from flat texture tile - *dest++ = shade_pal_index(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} -#endif - #ifndef X86_ASM // [RH] Draw a span with holes @@ -2250,72 +1257,6 @@ void R_DrawSpanMaskedP_C (void) } #endif -void R_DrawSpanMaskedP_RGBA_C() -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const BYTE* source = ds_source; - const BYTE* colormap = ds_colormap; - int count; - int spot; - - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - BYTE texdata; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - texdata = source[spot]; - if (texdata != 0) - { - *dest = shade_pal_index(texdata, light, shade_constants); - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - BYTE texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - if (texdata != 0) - { - *dest = shade_pal_index(texdata, light, shade_constants); - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - - void R_DrawSpanTranslucentP_C (void) { dsfixed_t xfrac; @@ -2378,89 +1319,6 @@ void R_DrawSpanTranslucentP_C (void) } } -void R_DrawSpanTranslucentP_RGBA_C() -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const BYTE* source = ds_source; - int count; - int spot; - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - uint32_t fg = shade_pal_index(source[spot], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; - - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - uint32_t fg = shade_pal_index(source[spot], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; - - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - void R_DrawSpanMaskedTranslucentP_C (void) { dsfixed_t xfrac; @@ -2537,99 +1395,6 @@ void R_DrawSpanMaskedTranslucentP_C (void) } } -void R_DrawSpanMaskedTranslucentP_RGBA_C() -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const BYTE* source = ds_source; - int count; - int spot; - - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - BYTE texdata; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - texdata = source[spot]; - if (texdata != 0) - { - uint32_t fg = shade_pal_index(texdata, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - BYTE texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - if (texdata != 0) - { - uint32_t fg = shade_pal_index(texdata, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - void R_DrawSpanAddClampP_C (void) { dsfixed_t xfrac; @@ -2700,88 +1465,6 @@ void R_DrawSpanAddClampP_C (void) } } -void R_DrawSpanAddClampP_RGBA_C() -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const BYTE* source = ds_source; - int count; - int spot; - - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - uint32_t fg = shade_pal_index(source[spot], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; - - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - uint32_t fg = shade_pal_index(source[spot], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; - - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} void R_DrawSpanMaskedAddClampP_C (void) { @@ -2865,114 +1548,12 @@ void R_DrawSpanMaskedAddClampP_C (void) } } -void R_DrawSpanMaskedAddClampP_RGBA_C() -{ - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const BYTE* source = ds_source; - int count; - int spot; - - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - xfrac = ds_xfrac; - yfrac = ds_yfrac; - - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - - count = ds_x2 - ds_x1 + 1; - - xstep = ds_xstep; - ystep = ds_ystep; - - if (ds_xbits == 6 && ds_ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - BYTE texdata; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - texdata = source[spot]; - if (texdata != 0) - { - uint32_t fg = shade_pal_index(texdata, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; - do - { - BYTE texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - if (texdata != 0) - { - uint32_t fg = shade_pal_index(texdata, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); - } -} - // [RH] Just fill a span with a color void R_FillSpan_C (void) { memset (ylookup[ds_y] + ds_x1 + dc_destorg, ds_color, (ds_x2 - ds_x1 + 1)); } -void R_FillSpan_RGBA() -{ - uint32_t *dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - int count = (ds_x2 - ds_x1 + 1); - uint32_t light = calc_light_multiplier(ds_light); - uint32_t color = shade_pal_index_simple(ds_color, light); - for (int i = 0; i < count; i++) - dest[i] = color; -} // Draw a voxel slab // @@ -3070,8 +1651,8 @@ extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *v // wallscan stuff, in C -static int vlinebits; -static int mvlinebits; +int vlinebits; +int mvlinebits; #ifndef X86_ASM static DWORD vlinec1 (); @@ -3186,29 +1767,6 @@ DWORD vlinec1 () } #endif -DWORD vlinec1_RGBA() -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - int count = dc_count; - const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; - int bits = vlinebits; - int pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - do - { - *dest = shade_pal_index(source[frac >> bits], light, shade_constants); - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - #if !defined(X86_ASM) void vlinec4 () { @@ -3228,113 +1786,6 @@ void vlinec4 () } #endif -void vlinec4_RGBA() -{ - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; - int bits = vlinebits; - DWORD place; - - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = dc_shade_constants; - - do - { - dest[0] = shade_pal_index(bufplce[0][(place = vplce[0]) >> bits], light0, shade_constants); vplce[0] = place + vince[0]; - dest[1] = shade_pal_index(bufplce[1][(place = vplce[1]) >> bits], light1, shade_constants); vplce[1] = place + vince[1]; - dest[2] = shade_pal_index(bufplce[2][(place = vplce[2]) >> bits], light2, shade_constants); vplce[2] = place + vince[2]; - dest[3] = shade_pal_index(bufplce[3][(place = vplce[3]) >> bits], light3, shade_constants); vplce[3] = place + vince[3]; - dest += dc_pitch; - } while (--count); -} - -#ifndef NO_SSE -void vlinec4_RGBA_SSE() -{ - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; - int bits = vlinebits; - - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - BYTE p0 = bufplce[0][place0 >> bits]; - BYTE p1 = bufplce[1][place1 >> bits]; - BYTE p2 = bufplce[2][place2 >> bits]; - BYTE p3 = bufplce[3][place3 >> bits]; - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += dc_pitch; - } while (--count); - } - else - { - SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - BYTE p0 = bufplce[0][place0 >> bits]; - BYTE p1 = bufplce[1][place1 >> bits]; - BYTE p2 = bufplce[2][place2 >> bits]; - BYTE p3 = bufplce[3][place3 >> bits]; - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - dest += dc_pitch; - } while (--count); - } - - // Is this needed? Global variables makes it tricky to know.. - vplce[0] = local_vplce[0]; - vplce[1] = local_vplce[1]; - vplce[2] = local_vplce[2]; - vplce[3] = local_vplce[3]; - vince[0] = local_vince[0]; - vince[1] = local_vince[1]; - vince[2] = local_vince[2]; - vince[3] = local_vince[3]; -} -#endif - void setupmvline (int fracbits) { if (!r_swtruecolor) @@ -3380,34 +1831,6 @@ DWORD mvlinec1 () } #endif -DWORD mvlinec1_RGBA() -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; - int count = dc_count; - const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; - int bits = mvlinebits; - int pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - do - { - BYTE pix = source[frac >> bits]; - if (pix != 0) - { - *dest = shade_pal_index(pix, light, shade_constants); - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - #if !defined(X86_ASM) void mvlinec4 () { @@ -3428,121 +1851,6 @@ void mvlinec4 () } #endif -void mvlinec4_RGBA() -{ - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; - int bits = mvlinebits; - DWORD place; - - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = dc_shade_constants; - - do - { - BYTE pix; - pix = bufplce[0][(place = vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(pix, light0, shade_constants); vplce[0] = place + vince[0]; - pix = bufplce[1][(place = vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(pix, light1, shade_constants); vplce[1] = place + vince[1]; - pix = bufplce[2][(place = vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(pix, light2, shade_constants); vplce[2] = place + vince[2]; - pix = bufplce[3][(place = vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(pix, light3, shade_constants); vplce[3] = place + vince[3]; - dest += dc_pitch; - } while (--count); -} - -#ifndef NO_SSE -void mvlinec4_RGBA_SSE() -{ - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; - int bits = vlinebits; - - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - BYTE pix0 = bufplce[0][place0 >> bits]; - BYTE pix1 = bufplce[1][place1 >> bits]; - BYTE pix2 = bufplce[2][place2 >> bits]; - BYTE pix3 = bufplce[3][place3 >> bits]; - - // movemask = !(pix == 0) - __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); - SSE_SHADE_SIMPLE(fg); - _mm_maskmoveu_si128(fg, movemask, (char*)dest); - dest += dc_pitch; - } while (--count); - } - else - { - SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - BYTE pix0 = bufplce[0][place0 >> bits]; - BYTE pix1 = bufplce[1][place1 >> bits]; - BYTE pix2 = bufplce[2][place2 >> bits]; - BYTE pix3 = bufplce[3][place3 >> bits]; - - // movemask = !(pix == 0) - __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); - SSE_SHADE(fg, shade_constants); - _mm_maskmoveu_si128(fg, movemask, (char*)dest); - dest += dc_pitch; - } while (--count); - } - - // Is this needed? Global variables makes it tricky to know.. - vplce[0] = local_vplce[0]; - vplce[1] = local_vplce[1]; - vplce[2] = local_vplce[2]; - vplce[3] = local_vplce[3]; - vince[0] = local_vince[0]; - vince[1] = local_vince[1]; - vince[2] = local_vince[2]; - vince[3] = local_vince[3]; -} -#endif - - extern "C" short spanend[MAXHEIGHT]; extern float rw_light; extern float rw_lightstep; @@ -3666,196 +1974,6 @@ void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip) } } -static void R_DrawFogBoundarySection_RGBA(int y, int y2, int x1) -{ - BYTE *colormap = dc_colormap; - uint32_t *dest = ylookup[y] + (uint32_t*)dc_destorg; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants constants = dc_shade_constants; - - for (; y < y2; ++y) - { - int x2 = spanend[y]; - int x = x1; - do - { - uint32_t red = (dest[x] >> 16) & 0xff; - uint32_t green = (dest[x] >> 8) & 0xff; - uint32_t blue = dest[x] & 0xff; - - if (constants.simple_shade) - { - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; - } - else - { - uint32_t inv_light = 256 - light; - uint32_t inv_desaturate = 256 - constants.desaturate; - - uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; - - red = (red * inv_desaturate + intensity) / 256; - green = (green * inv_desaturate + intensity) / 256; - blue = (blue * inv_desaturate + intensity) / 256; - - red = (constants.fade_red * inv_light + red * light) / 256; - green = (constants.fade_green * inv_light + green * light) / 256; - blue = (constants.fade_blue * inv_light + blue * light) / 256; - - red = (red * constants.light_red) / 256; - green = (green * constants.light_green) / 256; - blue = (blue * constants.light_blue) / 256; - } - - dest[x] = 0xff000000 | (red << 16) | (green << 8) | blue; - } while (++x <= x2); - dest += dc_pitch; - } -} - -static void R_DrawFogBoundaryLine_RGBA(int y, int x) -{ - int x2 = spanend[y]; - BYTE *colormap = dc_colormap; - uint32_t *dest = ylookup[y] + (uint32_t*)dc_destorg; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants constants = dc_shade_constants; - - do - { - uint32_t red = (dest[x] >> 16) & 0xff; - uint32_t green = (dest[x] >> 8) & 0xff; - uint32_t blue = dest[x] & 0xff; - - if (constants.simple_shade) - { - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; - } - else - { - uint32_t inv_light = 256 - light; - uint32_t inv_desaturate = 256 - constants.desaturate; - - uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; - - red = (red * inv_desaturate + intensity) / 256; - green = (green * inv_desaturate + intensity) / 256; - blue = (blue * inv_desaturate + intensity) / 256; - - red = (constants.fade_red * inv_light + red * light) / 256; - green = (constants.fade_green * inv_light + green * light) / 256; - blue = (constants.fade_blue * inv_light + blue * light) / 256; - - red = (red * constants.light_red) / 256; - green = (green * constants.light_green) / 256; - blue = (blue * constants.light_blue) / 256; - } - - dest[x] = 0xff000000 | (red << 16) | (green << 8) | blue; - } while (++x <= x2); -} - -void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip) -{ - // To do: we do not need to create new spans when using rgba output - instead we should calculate light on a per pixel basis - - // This is essentially the same as R_MapVisPlane but with an extra step - // to create new horizontal spans whenever the light changes enough that - // we need to use a new colormap. - - double lightstep = rw_lightstep; - double light = rw_light + rw_lightstep*(x2 - x1 - 1); - int x = x2 - 1; - int t2 = uclip[x]; - int b2 = dclip[x]; - int rcolormap = GETPALOOKUP(light, wallshade); - int lcolormap; - BYTE *basecolormapdata = basecolormap->Maps; - - if (b2 > t2) - { - clearbufshort(spanend + t2, b2 - t2, x); - } - - R_SetColorMapLight(basecolormap, (float)light, wallshade); - - BYTE *fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - - for (--x; x >= x1; --x) - { - int t1 = uclip[x]; - int b1 = dclip[x]; - const int xr = x + 1; - int stop; - - light -= rw_lightstep; - lcolormap = GETPALOOKUP(light, wallshade); - if (lcolormap != rcolormap) - { - if (t2 < b2 && rcolormap != 0) - { // Colormap 0 is always the identity map, so rendering it is - // just a waste of time. - R_DrawFogBoundarySection_RGBA(t2, b2, xr); - } - if (t1 < t2) t2 = t1; - if (b1 > b2) b2 = b1; - if (t2 < b2) - { - clearbufshort(spanend + t2, b2 - t2, x); - } - rcolormap = lcolormap; - R_SetColorMapLight(basecolormap, (float)light, wallshade); - fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - } - else - { - if (fake_dc_colormap != basecolormapdata) - { - stop = MIN(t1, b2); - while (t2 < stop) - { - R_DrawFogBoundaryLine_RGBA(t2++, xr); - } - stop = MAX(b1, t2); - while (b2 > stop) - { - R_DrawFogBoundaryLine_RGBA(--b2, xr); - } - } - else - { - t2 = MAX(t2, MIN(t1, b2)); - b2 = MIN(b2, MAX(b1, t2)); - } - - stop = MIN(t2, b1); - while (t1 < stop) - { - spanend[t1++] = x; - } - stop = MAX(b2, t2); - while (b1 > stop) - { - spanend[--b1] = x; - } - } - - t2 = uclip[x]; - b2 = dclip[x]; - } - if (t2 < b2 && rcolormap != 0) - { - R_DrawFogBoundarySection_RGBA(t2, b2, x1); - } -} - - int tmvlinebits; void setuptmvline (int bits) @@ -3896,49 +2014,6 @@ fixed_t tmvline1_add_C () return frac; } -fixed_t tmvline1_add_RGBA() -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - int count = dc_count; - const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; - int bits = tmvlinebits; - int pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - BYTE pix = source[frac >> bits]; - if (pix != 0) - { - uint32_t fg = shade_pal_index(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - void tmvline4_add_C () { BYTE *dest = dc_dest; @@ -3972,51 +2047,6 @@ void tmvline4_add_C () } while (--count); } -void tmvline4_add_RGBA() -{ - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; - int bits = tmvlinebits; - - uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - for (int i = 0; i < 4; ++i) - { - BYTE pix = bufplce[i][vplce[i] >> bits]; - if (pix != 0) - { - uint32_t fg = shade_pal_index(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - vplce[i] += vince[i]; - } - dest += dc_pitch; - } while (--count); -} - fixed_t tmvline1_addclamp_C () { DWORD fracstep = dc_iscale; @@ -4055,49 +2085,6 @@ fixed_t tmvline1_addclamp_C () return frac; } -fixed_t tmvline1_addclamp_RGBA() -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - int count = dc_count; - const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; - int bits = tmvlinebits; - int pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - BYTE pix = source[frac >> bits]; - if (pix != 0) - { - uint32_t fg = shade_pal_index(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - void tmvline4_addclamp_C () { BYTE *dest = dc_dest; @@ -4130,51 +2117,6 @@ void tmvline4_addclamp_C () } while (--count); } -void tmvline4_addclamp_RGBA() -{ - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; - int bits = tmvlinebits; - - uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - for (int i = 0; i < 4; ++i) - { - BYTE pix = bufplce[i][vplce[i] >> bits]; - if (pix != 0) - { - uint32_t fg = shade_pal_index(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - vplce[i] += vince[i]; - } - dest += dc_pitch; - } while (--count); -} - fixed_t tmvline1_subclamp_C () { DWORD fracstep = dc_iscale; @@ -4210,50 +2152,6 @@ fixed_t tmvline1_subclamp_C () return frac; } -fixed_t tmvline1_subclamp_RGBA() -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; - int count = dc_count; - const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; - int bits = tmvlinebits; - int pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - BYTE pix = source[frac >> bits]; - if (pix != 0) - { - uint32_t fg = shade_pal_index(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - void tmvline4_subclamp_C () { BYTE *dest = dc_dest; @@ -4285,51 +2183,6 @@ void tmvline4_subclamp_C () } while (--count); } -void tmvline4_subclamp_RGBA() -{ - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; - int bits = tmvlinebits; - - uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - for (int i = 0; i < 4; ++i) - { - BYTE pix = bufplce[i][vplce[i] >> bits]; - if (pix != 0) - { - uint32_t fg = shade_pal_index(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - vplce[i] += vince[i]; - } - dest += dc_pitch; - } while (--count); -} - fixed_t tmvline1_revsubclamp_C () { DWORD fracstep = dc_iscale; @@ -4365,50 +2218,6 @@ fixed_t tmvline1_revsubclamp_C () return frac; } -fixed_t tmvline1_revsubclamp_RGBA() -{ - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - BYTE *colormap = dc_colormap; - int count = dc_count; - const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; - int bits = tmvlinebits; - int pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - BYTE pix = source[frac >> bits]; - if (pix != 0) - { - uint32_t fg = shade_pal_index(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - frac += fracstep; - dest += pitch; - } while (--count); - - return frac; -} - void tmvline4_revsubclamp_C () { BYTE *dest = dc_dest; @@ -4440,52 +2249,6 @@ void tmvline4_revsubclamp_C () } while (--count); } -void tmvline4_revsubclamp_RGBA() -{ - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; - int bits = tmvlinebits; - - uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do - { - for (int i = 0; i < 4; ++i) - { - BYTE pix = bufplce[i][vplce[i] >> bits]; - if (pix != 0) - { - uint32_t fg = shade_pal_index(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - vplce[i] += vince[i]; - } - dest += dc_pitch; - } while (--count); -} - - //========================================================================== // // R_GetColumn @@ -4535,11 +2298,7 @@ void R_InitColumnDrawers () R_DrawTranslatedColumn = R_DrawTranslatedColumnP_RGBA_C; R_DrawShadedColumn = R_DrawShadedColumnP_RGBA_C; R_DrawSpanMasked = R_DrawSpanMaskedP_RGBA_C; -#ifndef NO_SSE - R_DrawSpan = R_DrawSpanP_RGBA_SSE; -#else - R_DrawSpan = R_DrawSpanP_RGBA_C; -#endif + R_DrawSpan = R_DrawSpanP_RGBA_C; R_DrawSpanTranslucent = R_DrawSpanTranslucentP_RGBA_C; R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_RGBA_C; @@ -4579,9 +2338,13 @@ void R_InitColumnDrawers () rt_copy1col = rt_copy1col_RGBA_c; rt_copy4cols = rt_copy4cols_RGBA_c; rt_map1col = rt_map1col_RGBA_c; + rt_map4cols = rt_map4cols_RGBA_c; rt_shaded1col = rt_shaded1col_RGBA_c; + rt_shaded4cols = rt_shaded4cols_RGBA_c; rt_add1col = rt_add1col_RGBA_c; + rt_add4cols = rt_add4cols_RGBA_c; rt_addclamp1col = rt_addclamp1col_RGBA_c; + rt_addclamp4cols = rt_addclamp4cols_RGBA_c; rt_subclamp1col = rt_subclamp1col_RGBA_c; rt_revsubclamp1col = rt_revsubclamp1col_RGBA_c; rt_tlate1col = rt_tlate1col_RGBA_c; @@ -4597,31 +2360,14 @@ void R_InitColumnDrawers () rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_RGBA_c; rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_RGBA_c; rt_initcols = rt_initcols_rgba; - -#ifndef NO_SSE - rt_map4cols = rt_map4cols_RGBA_SSE; - rt_add4cols = rt_add4cols_RGBA_SSE; - rt_addclamp4cols = rt_addclamp4cols_RGBA_SSE; - rt_shaded4cols = rt_shaded4cols_RGBA_SSE; -#else - rt_map4cols = rt_map4cols_RGBA_c; - rt_add4cols = rt_add4cols_RGBA_c; - rt_addclamp4cols = rt_addclamp4cols_RGBA_c; - rt_shaded4cols = rt_shaded4cols_RGBA_c; -#endif + rt_span_coverage = rt_span_coverage_rgba; dovline1 = vlinec1_RGBA; doprevline1 = vlinec1_RGBA; domvline1 = mvlinec1_RGBA; -#ifndef NO_SSE - dovline4 = vlinec4_RGBA_SSE; - domvline4 = mvlinec4_RGBA_SSE; -#else dovline4 = vlinec4_RGBA; domvline4 = mvlinec4_RGBA; -#endif - } else { @@ -4719,6 +2465,7 @@ void R_InitColumnDrawers () rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_c; rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_c; rt_initcols = rt_initcols_pal; + rt_span_coverage = rt_span_coverage_pal; if (pointers_saved) { diff --git a/src/r_draw.h b/src/r_draw.h index cc3b10935..98be57c51 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -24,6 +24,13 @@ #define __R_DRAW__ #include "r_defs.h" +#include + +// Spectre/Invisibility. +#define FUZZTABLE 50 +extern "C" int fuzzoffset[FUZZTABLE + 1]; // [RH] +1 for the assembly routine +extern "C" int fuzzpos; +extern "C" int fuzzviewheight; struct FColormap; struct ShadeConstants; @@ -173,7 +180,6 @@ void rt_copy4cols_RGBA_c (int sx, int yl, int yh); void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh); void rt_shaded4cols_RGBA_c (int sx, int yl, int yh); -void rt_shaded4cols_RGBA_SSE (int sx, int yl, int yh); void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh); void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh); @@ -188,11 +194,8 @@ void rt_tlatesubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); void rt_tlaterevsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); void rt_map4cols_RGBA_c (int sx, int yl, int yh); -void rt_map4cols_RGBA_SSE (int sx, int yl, int yh); void rt_add4cols_RGBA_c (int sx, int yl, int yh); -void rt_add4cols_RGBA_SSE (int sx, int yl, int yh); void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh); -void rt_addclamp4cols_RGBA_SSE (int sx, int yl, int yh); void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh); void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh); @@ -235,6 +238,7 @@ extern void (*rt_tlatesubclamp4cols)(int sx, int yl, int yh); extern void (*rt_tlaterevsubclamp4cols)(int sx, int yl, int yh); extern void (*rt_initcols)(BYTE *buffer); +extern void (*rt_span_coverage)(int x, int start, int stop); void rt_draw4cols (int sx); @@ -242,6 +246,8 @@ void rt_draw4cols (int sx); void rt_initcols_pal (BYTE *buffer); void rt_initcols_rgba (BYTE *buffer); +void rt_span_coverage_pal(int x, int start, int stop); +void rt_span_coverage_rgba(int x, int start, int stop); extern void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip); @@ -277,9 +283,40 @@ void R_DrawFuzzColumnP_RGBA_C (void); void R_DrawTranslatedColumnP_RGBA_C (void); void R_DrawShadedColumnP_RGBA_C (void); void R_DrawSpanP_RGBA_C (void); -void R_DrawSpanP_RGBA_SSE (void); void R_DrawSpanMaskedP_RGBA_C (void); +void R_DrawSpanTranslucentP_RGBA_C(); +void R_DrawSpanMaskedTranslucentP_RGBA_C(); +void R_DrawSpanAddClampP_RGBA_C(); +void R_DrawSpanMaskedAddClampP_RGBA_C(); +void R_FillColumnP_RGBA(); +void R_FillAddColumn_RGBA_C(); +void R_FillAddClampColumn_RGBA(); +void R_FillSubClampColumn_RGBA(); +void R_FillRevSubClampColumn_RGBA(); +void R_DrawAddColumnP_RGBA_C(); +void R_DrawTlatedAddColumnP_RGBA_C(); +void R_DrawAddClampColumnP_RGBA_C(); +void R_DrawAddClampTranslatedColumnP_RGBA_C(); +void R_DrawSubClampColumnP_RGBA_C(); +void R_DrawSubClampTranslatedColumnP_RGBA_C(); +void R_DrawRevSubClampColumnP_RGBA_C(); +void R_DrawRevSubClampTranslatedColumnP_RGBA_C(); +void R_FillSpan_RGBA(); +void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip); +fixed_t tmvline1_add_RGBA(); +void tmvline4_add_RGBA(); +fixed_t tmvline1_addclamp_RGBA(); +void tmvline4_addclamp_RGBA(); +fixed_t tmvline1_subclamp_RGBA(); +void tmvline4_subclamp_RGBA(); +fixed_t tmvline1_revsubclamp_RGBA(); +void tmvline4_revsubclamp_RGBA(); +DWORD vlinec1_RGBA(); +void vlinec4_RGBA(); +DWORD mvlinec1_RGBA(); +void mvlinec4_RGBA(); + void R_DrawSpanTranslucentP_C (void); void R_DrawSpanMaskedTranslucentP_C (void); @@ -403,4 +440,52 @@ void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade); void R_SetTranslationMap(lighttable_t *translation); +// Wait until all drawers finished executing +void R_FinishDrawerCommands(); + +class DrawerThread +{ +public: + int core = 0; + int num_cores = 1; + + uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; + uint32_t *dc_temp_rgba; +}; + +class DrawerCommand +{ +public: + virtual void Execute(DrawerThread *thread) = 0; +}; + +class DrawerCommandQueue +{ + enum { memorypool_size = 4 * 1024 * 1024 }; + char memorypool[memorypool_size]; + size_t memorypool_pos = 0; + + std::vector commands; + + static DrawerCommandQueue *Instance(); + +public: + // Allocate memory valid for the duration of a command execution + static void* AllocMemory(size_t size); + + // Queue command to be executed by drawer worker threads + template + static void QueueCommand(Types &&... args) + { + void *ptr = AllocMemory(sizeof(T)); + T *command = new (ptr)T(std::forward(args)...); + if (!command) + return; + Instance()->commands.push_back(command); + } + + // Wait until all worker threads finished executing commands + static void Finish(); +}; + #endif diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp new file mode 100644 index 000000000..9e61bb427 --- /dev/null +++ b/src/r_draw_rgba.cpp @@ -0,0 +1,3492 @@ +// Emacs style mode select -*- C++ -*- +//----------------------------------------------------------------------------- +// +// $Id:$ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// +// $Log:$ +// +// DESCRIPTION: +// True color span/column drawing functions. +// +//----------------------------------------------------------------------------- + +#include + +#include "templates.h" +#include "doomdef.h" +#include "i_system.h" +#include "w_wad.h" +#include "r_local.h" +#include "v_video.h" +#include "doomstat.h" +#include "st_stuff.h" +#include "g_game.h" +#include "g_level.h" +#include "r_data/r_translate.h" +#include "v_palette.h" +#include "r_data/colormaps.h" +#include "r_plane.h" + +#include "gi.h" +#include "stats.h" +#include "x86.h" +#ifndef NO_SSE +#include +#endif +#include + +extern int vlinebits; +extern int mvlinebits; +extern int tmvlinebits; + +extern "C" short spanend[MAXHEIGHT]; +extern float rw_light; +extern float rw_lightstep; +extern int wallshade; + +///////////////////////////////////////////////////////////////////////////// + +DrawerCommandQueue *DrawerCommandQueue::Instance() +{ + static DrawerCommandQueue queue; + return &queue; +} + +void* DrawerCommandQueue::AllocMemory(size_t size) +{ + // Make sure allocations remain 16-byte aligned + size = (size + 15) / 16 * 16; + + auto queue = Instance(); + if (queue->memorypool_pos + size > memorypool_size) + return nullptr; + + void *data = queue->memorypool + queue->memorypool_pos; + queue->memorypool_pos += size; + return data; +} + +void DrawerCommandQueue::Finish() +{ + auto queue = Instance(); + + DrawerThread thread; + + size_t size = queue->commands.size(); + for (size_t i = 0; i < size; i++) + { + auto &command = queue->commands[i]; + command->Execute(&thread); + } + + for (auto &command : queue->commands) + command->~DrawerCommand(); + queue->commands.clear(); + queue->memorypool_pos = 0; +} + +///////////////////////////////////////////////////////////////////////////// + +class DrawColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_texturefrac; + fixed_t dc_iscale; + fixed_t dc_light; + const BYTE *dc_source; + int dc_pitch; + ShadeConstants dc_shade_constants; + +public: + DrawColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_texturefrac = ::dc_texturefrac; + dc_iscale = ::dc_iscale; + dc_light = ::dc_light; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + dc_shade_constants = ::dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t* dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + + // Zero length, column does not exceed a pixel. + if (count <= 0) + return; + + // Framebuffer destination address. + dest = (uint32_t*)dc_dest; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + // Determine scaling, + // which is the only mapping to be done. + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + // [RH] Get local copies of these variables so that the compiler + // has a better chance of optimizing this well. + const BYTE *source = dc_source; + int pitch = dc_pitch; + + // Inner loop that does the actual texture mapping, + // e.g. a DDA-lile scaling. + // This is as fast as it gets. + do + { + *dest = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); + + dest += pitch; + frac += fracstep; + + } while (--count); + } + } +}; + +class FillColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_light; + int dc_pitch; + int dc_color; + +public: + FillColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_light = ::dc_light; + dc_pitch = ::dc_pitch; + dc_color = ::dc_color; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t* dest; + + count = dc_count; + + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + uint32_t light = calc_light_multiplier(dc_light); + + { + int pitch = dc_pitch; + BYTE color = dc_color; + + do + { + *dest = shade_pal_index_simple(color, light); + dest += pitch; + } while (--count); + } + } +}; + +class FillAddColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + int dc_pitch; + fixed_t dc_light; + int dc_color; + +public: + FillAddColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_color = ::dc_color; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + int pitch = dc_pitch; + + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg_red = (fg >> 24) & 0xff; + uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red + 1) / 2; + uint32_t green = (fg_green + bg_green + 1) / 2; + uint32_t blue = (fg_blue + bg_blue + 1) / 2; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); + } +}; + +class FillAddClampColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + int dc_pitch; + fixed_t dc_light; + int dc_color; + +public: + FillAddClampColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_color = ::dc_color; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + int pitch = dc_pitch; + + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg_red = (fg >> 24) & 0xff; + uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(fg_red + bg_red, 0, 255); + uint32_t green = clamp(fg_green + bg_green, 0, 255); + uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); + } +}; + +class FillSubClampColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + int dc_pitch; + int dc_color; + fixed_t dc_light; + +public: + FillSubClampColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_pitch = ::dc_pitch; + dc_color = ::dc_color; + dc_light = ::dc_light; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + int pitch = dc_pitch; + + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg_red = (fg >> 24) & 0xff; + uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 255; + uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 255; + uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 255; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); + } +}; + +class FillRevSubClampColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + int dc_pitch; + int dc_color; + fixed_t dc_light; + +public: + FillRevSubClampColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_pitch = ::dc_pitch; + dc_color = ::dc_color; + dc_light = ::dc_light; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + int pitch = dc_pitch; + + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg_red = (fg >> 24) & 0xff; + uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 255; + uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 255; + uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 255; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); + } +}; + +class DrawFuzzColumnRGBACommand : public DrawerCommand +{ + int dc_x; + int dc_yl; + int dc_yh; + BYTE *dc_destorg; + int dc_pitch; + int fuzzpos; + int fuzzviewheight; + +public: + DrawFuzzColumnRGBACommand() + { + dc_x = ::dc_x; + dc_yl = ::dc_yl; + dc_yh = ::dc_yh; + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + fuzzpos = ::fuzzpos; + fuzzviewheight = ::fuzzviewheight; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + + // Adjust borders. Low... + if (dc_yl == 0) + dc_yl = 1; + + // .. and high. + if (dc_yh > fuzzviewheight) + dc_yh = fuzzviewheight; + + count = dc_yh - dc_yl; + + // Zero length. + if (count < 0) + return; + + count++; + + dest = ylookup[dc_yl] + dc_x + (uint32_t*)dc_destorg; + + // Note: this implementation assumes this function is only used for the pinky shadow effect (i.e. no other fancy colormap than black) + // I'm not sure if this is really always the case or not. + + { + // [RH] Make local copies of global vars to try and improve + // the optimizations made by the compiler. + int pitch = dc_pitch; + int fuzz = fuzzpos; + int cnt; + + // [RH] Split this into three separate loops to minimize + // the number of times fuzzpos needs to be clamped. + if (fuzz) + { + cnt = MIN(FUZZTABLE - fuzz, count); + count -= cnt; + do + { + uint32_t bg = dest[fuzzoffset[fuzz++]]; + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = (bg) & 0xff; + + uint32_t red = bg_red * 3 / 4; + uint32_t green = bg_green * 3 / 4; + uint32_t blue = bg_blue * 3 / 4; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--cnt); + } + if (fuzz == FUZZTABLE || count > 0) + { + while (count >= FUZZTABLE) + { + fuzz = 0; + cnt = FUZZTABLE; + count -= FUZZTABLE; + do + { + uint32_t bg = dest[fuzzoffset[fuzz++]]; + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = (bg) & 0xff; + + uint32_t red = bg_red * 3 / 4; + uint32_t green = bg_green * 3 / 4; + uint32_t blue = bg_blue * 3 / 4; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--cnt); + } + fuzz = 0; + if (count > 0) + { + do + { + uint32_t bg = dest[fuzzoffset[fuzz++]]; + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = (bg) & 0xff; + + uint32_t red = bg_red * 3 / 4; + uint32_t green = bg_green * 3 / 4; + uint32_t blue = bg_blue * 3 / 4; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + } while (--count); + } + } + fuzzpos = fuzz; + } + } +}; + +class DrawAddColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + const BYTE *dc_source; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + DrawAddColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + const BYTE *source = dc_source; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); + + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawTranslatedColumnRGBACommand : public DrawerCommand +{ + int dc_count; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + BYTE *dc_translation; + const BYTE *dc_source; + int dc_pitch; + +public: + DrawTranslatedColumnRGBACommand() + { + dc_count = ::dc_count; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_translation = ::dc_translation; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t* dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + // [RH] Local copies of global vars to improve compiler optimizations + BYTE *translation = dc_translation; + const BYTE *source = dc_source; + int pitch = dc_pitch; + + do + { + *dest = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawTlatedAddColumnRGBACommand : public DrawerCommand +{ + int dc_count; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + BYTE *dc_translation; + const BYTE *dc_source; + int dc_pitch; + +public: + DrawTlatedAddColumnRGBACommand() + { + dc_count = ::dc_count; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_translation = ::dc_translation; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + BYTE *translation = dc_translation; + const BYTE *source = dc_source; + int pitch = dc_pitch; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); + + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawShadedColumnRGBACommand : public DrawerCommand +{ +private: + int dc_count; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + fixed_t dc_light; + const BYTE *dc_source; + lighttable_t *dc_colormap; + int dc_color; + int dc_pitch; + +public: + DrawShadedColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_light = ::dc_light; + dc_source = ::dc_source; + dc_colormap = ::dc_colormap; + dc_color = ::dc_color; + dc_pitch = ::dc_pitch; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac, fracstep; + + count = dc_count; + + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + { + const BYTE *source = dc_source; + BYTE *colormap = dc_colormap; + int pitch = dc_pitch; + + do + { + DWORD alpha = clamp(colormap[source[frac >> FRACBITS]], 0, 64); + DWORD inv_alpha = 64 - alpha; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; + uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; + uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawAddClampColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + const BYTE *dc_source; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + DrawAddClampColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawAddClampTranslatedColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + BYTE *dc_translation; + const BYTE *dc_source; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + DrawAddClampTranslatedColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_translation = ::dc_translation; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + BYTE *translation = dc_translation; + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawSubClampColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + const BYTE *dc_source; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + DrawSubClampColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawSubClampTranslatedColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + const BYTE *dc_source; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + BYTE *dc_translation; + +public: + DrawSubClampTranslatedColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + dc_translation = ::dc_translation; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + BYTE *translation = dc_translation; + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawRevSubClampColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + const BYTE *dc_source; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + DrawRevSubClampColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawRevSubClampTranslatedColumnRGBACommand : public DrawerCommand +{ + int dc_count; + BYTE *dc_dest; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + const BYTE *dc_source; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + BYTE *dc_translation; + +public: + DrawRevSubClampTranslatedColumnRGBACommand() + { + dc_count = ::dc_count; + dc_dest = ::dc_dest; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_source = ::dc_source; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + dc_translation = ::dc_translation; + } + + void Execute(DrawerThread *thread) override + { + int count; + uint32_t *dest; + fixed_t frac; + fixed_t fracstep; + + count = dc_count; + if (count <= 0) + return; + + dest = (uint32_t*)dc_dest; + + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + BYTE *translation = dc_translation; + const BYTE *source = dc_source; + int pitch = dc_pitch; + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + frac += fracstep; + } while (--count); + } + } +}; + +class DrawSpanRGBACommand : public DrawerCommand +{ + const BYTE *ds_source; + fixed_t ds_xfrac; + fixed_t ds_yfrac; + fixed_t ds_xstep; + fixed_t ds_ystep; + int ds_x1; + int ds_x2; + int ds_y; + int ds_xbits; + int ds_ybits; + BYTE *dc_destorg; + fixed_t ds_light; + ShadeConstants ds_shade_constants; + +public: + DrawSpanRGBACommand() + { + ds_source = ::ds_source; + ds_xfrac = ::ds_xfrac; + ds_yfrac = ::ds_yfrac; + ds_xstep = ::ds_xstep; + ds_ystep = ::ds_ystep; + ds_x1 = ::ds_x1; + ds_x2 = ::ds_x2; + ds_y = ::ds_y; + ds_xbits = ::ds_xbits; + ds_ybits = ::ds_ybits; + dc_destorg = ::dc_destorg; + ds_light = ::ds_light; + ds_shade_constants = ::ds_shade_constants; + } + +#ifdef NO_SSE + void Execute(DrawerThread *thread) override + { + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const BYTE* source = ds_source; + int count; + int spot; + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + + do + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + + // Lookup pixel from flat texture tile + *dest++ = shade_pal_index(source[spot], light, shade_constants); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + + do + { + // Current texture index in u,v. + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + // Lookup pixel from flat texture tile + *dest++ = shade_pal_index(source[spot], light, shade_constants); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } +#else + void Execute(DrawerThread *thread) override + { + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const BYTE* source = ds_source; + int count; + int spot; + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + + int sse_count = count / 4; + count -= sse_count * 4; + + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT(light); + + while (sse_count--) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + + // Next step in u,v. + dest += 4; + } + } + else + { + SSE_SHADE_INIT(light, shade_constants); + + while (sse_count--) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + + // Next step in u,v. + dest += 4; + } + } + + if (count == 0) + return; + + do + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + + // Lookup pixel from flat texture tile + *dest++ = shade_pal_index(source[spot], light, shade_constants); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + + do + { + // Current texture index in u,v. + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + // Lookup pixel from flat texture tile + *dest++ = shade_pal_index(source[spot], light, shade_constants); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } +#endif +}; + +class DrawSpanMaskedRGBACommand : public DrawerCommand +{ + const BYTE *ds_source; + fixed_t ds_light; + ShadeConstants ds_shade_constants; + fixed_t ds_xfrac; + fixed_t ds_yfrac; + BYTE *dc_destorg; + int ds_x1; + int ds_y1; + int ds_y; + fixed_t ds_xstep; + fixed_t ds_ystep; + int ds_xbits; + int ds_ybits; + +public: + DrawSpanMaskedRGBACommand() + { + ds_source = ::ds_source; + ds_light = ::ds_light; + ds_shade_constants = ::ds_shade_constants; + ds_xfrac = ::ds_xfrac; + ds_yfrac = ::ds_yfrac; + dc_destorg = ::dc_destorg; + ds_x1 = ::ds_x1; + ds_x2 = ::ds_x2; + ds_y = ::ds_y; + ds_xstep = ::ds_xstep; + ds_ystep = ::ds_ystep; + ds_xbits = ::ds_xbits; + ds_ybits = ::ds_ybits; + } + + void Execute(DrawerThread *thread) override + { + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const BYTE* source = ds_source; + int count; + int spot; + + uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + BYTE texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { + *dest = shade_pal_index(texdata, light, shade_constants); + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + BYTE texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + *dest = shade_pal_index(texdata, light, shade_constants); + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } +}; + +class DrawSpanTranslucentRGBACommand : public DrawerCommand +{ + const BYTE *ds_source; + fixed_t ds_light; + ShadeConstants ds_shade_constants; + fixed_t ds_xfrac; + fixed_t ds_yfrac; + BYTE *dc_destorg; + int ds_x1; + int ds_y1; + int ds_y; + fixed_t ds_xstep; + fixed_t ds_ystep; + int ds_xbits; + int ds_ybits; + +public: + DrawSpanTranslucentRGBACommand() + { + ds_source = ::ds_source; + ds_light = ::ds_light; + ds_shade_constants = ::ds_shade_constants; + ds_xfrac = ::ds_xfrac; + ds_yfrac = ::ds_yfrac; + dc_destorg = ::dc_destorg; + ds_x1 = ::ds_x1; + ds_x2 = ::ds_x2; + ds_y = ::ds_y; + ds_xstep = ::ds_xstep; + ds_ystep = ::ds_ystep; + ds_xbits = ::ds_xbits; + ds_ybits = ::ds_ybits; + } + + void Execute(DrawerThread *thread) override + { + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const BYTE* source = ds_source; + int count; + int spot; + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + + uint32_t fg = shade_pal_index(source[spot], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; + + *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; + + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + uint32_t fg = shade_pal_index(source[spot], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; + + *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; + + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } +}; + +class DrawSpanMaskedTranslucentRGBACommand : public DrawerCommand +{ + const BYTE *ds_source; + fixed_t ds_light; + ShadeConstants ds_shade_constants; + fixed_t ds_xfrac; + fixed_t ds_yfrac; + BYTE *dc_destorg; + int ds_x1; + int ds_y1; + int ds_y; + fixed_t ds_xstep; + fixed_t ds_ystep; + int ds_xbits; + int ds_ybits; + +public: + DrawSpanMaskedTranslucentRGBACommand() + { + ds_source = ::ds_source; + ds_light = ::ds_light; + ds_shade_constants = ::ds_shade_constants; + ds_xfrac = ::ds_xfrac; + ds_yfrac = ::ds_yfrac; + dc_destorg = ::dc_destorg; + ds_x1 = ::ds_x1; + ds_x2 = ::ds_x2; + ds_y = ::ds_y; + ds_xstep = ::ds_xstep; + ds_ystep = ::ds_ystep; + ds_xbits = ::ds_xbits; + ds_ybits = ::ds_ybits; + } + + void Execute(DrawerThread *thread) override + { + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const BYTE* source = ds_source; + int count; + int spot; + + uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + BYTE texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = shade_pal_index(texdata, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + BYTE texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = shade_pal_index(texdata, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } +}; + +class DrawSpanAddClampRGBACommand : public DrawerCommand +{ + const BYTE *ds_source; + fixed_t ds_light; + ShadeConstants ds_shade_constants; + fixed_t ds_xfrac; + fixed_t ds_yfrac; + BYTE *dc_destorg; + int ds_x1; + int ds_y1; + int ds_y; + fixed_t ds_xstep; + fixed_t ds_ystep; + int ds_xbits; + int ds_ybits; + +public: + DrawSpanAddClampRGBACommand() + { + ds_source = ::ds_source; + ds_light = ::ds_light; + ds_shade_constants = ::ds_shade_constants; + ds_xfrac = ::ds_xfrac; + ds_yfrac = ::ds_yfrac; + dc_destorg = ::dc_destorg; + ds_x1 = ::ds_x1; + ds_x2 = ::ds_x2; + ds_y = ::ds_y; + ds_xstep = ::ds_xstep; + ds_ystep = ::ds_ystep; + ds_xbits = ::ds_xbits; + ds_ybits = ::ds_ybits; + } + + void Execute(DrawerThread *thread) override + { + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const BYTE* source = ds_source; + int count; + int spot; + + uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + + uint32_t fg = shade_pal_index(source[spot], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; + + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + uint32_t fg = shade_pal_index(source[spot], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; + + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } +}; + +class DrawSpanMaskedAddClampRGBACommand : public DrawerCommand +{ + const BYTE *ds_source; + fixed_t ds_light; + ShadeConstants ds_shade_constants; + fixed_t ds_xfrac; + fixed_t ds_yfrac; + BYTE *dc_destorg; + int ds_x1; + int ds_y1; + int ds_y; + fixed_t ds_xstep; + fixed_t ds_ystep; + int ds_xbits; + int ds_ybits; + +public: + DrawSpanMaskedAddClampRGBACommand() + { + ds_source = ::ds_source; + ds_light = ::ds_light; + ds_shade_constants = ::ds_shade_constants; + ds_xfrac = ::ds_xfrac; + ds_yfrac = ::ds_yfrac; + dc_destorg = ::dc_destorg; + ds_x1 = ::ds_x1; + ds_x2 = ::ds_x2; + ds_y = ::ds_y; + ds_xstep = ::ds_xstep; + ds_ystep = ::ds_ystep; + ds_xbits = ::ds_xbits; + ds_ybits = ::ds_ybits; + } + + void Execute(DrawerThread *thread) override + { + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const BYTE* source = ds_source; + int count; + int spot; + + uint32_t light = calc_light_multiplier(ds_light); + ShadeConstants shade_constants = ds_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + xfrac = ds_xfrac; + yfrac = ds_yfrac; + + dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + + count = ds_x2 - ds_x1 + 1; + + xstep = ds_xstep; + ystep = ds_ystep; + + if (ds_xbits == 6 && ds_ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do + { + BYTE texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = shade_pal_index(texdata, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - ds_ybits; + BYTE xshift = yshift - ds_xbits; + int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + do + { + BYTE texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + if (texdata != 0) + { + uint32_t fg = shade_pal_index(texdata, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = (fg) & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; + uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; + uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } +}; + +class FillSpanRGBACommand : public DrawerCommand +{ + int ds_x1; + int ds_x2; + int ds_y; + BYTE *dc_destorg; + fixed_t ds_light; + int ds_color; + +public: + FillSpanRGBACommand() + { + ds_x1 = ::ds_x1; + ds_x2 = ::ds_x2; + ds_y = ::ds_y; + dc_destorg = ::dc_destorg; + ds_light = ::ds_light; + ds_color = ::ds_color; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + int count = (ds_x2 - ds_x1 + 1); + uint32_t light = calc_light_multiplier(ds_light); + uint32_t color = shade_pal_index_simple(ds_color, light); + for (int i = 0; i < count; i++) + dest[i] = color; + } +}; + +class Vlinec1RGBACommand : public DrawerCommand +{ + fixed_t dc_iscale; + fixed_t dc_texturefrac; + int dc_count; + const BYTE *dc_source; + BYTE *dc_dest; + int vlinebits; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + +public: + Vlinec1RGBACommand() + { + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_count = ::dc_count; + dc_source = ::dc_source; + dc_dest = ::dc_dest; + vlinebits = ::vlinebits; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + int count = dc_count; + const BYTE *source = dc_source; + uint32_t *dest = (uint32_t*)dc_dest; + int bits = vlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + do + { + *dest = shade_pal_index(source[frac >> bits], light, shade_constants); + frac += fracstep; + dest += pitch; + } while (--count); + } +}; + +class Vlinec4RGBACommand : public DrawerCommand +{ + BYTE *dc_dest; + int dc_count; + int dc_pitch; + ShadeConstants dc_shade_constants; + int vlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const BYTE *bufplce[4]; + +public: + Vlinec4RGBACommand() + { + dc_dest = ::dc_dest; + dc_count = ::dc_count; + dc_pitch = ::dc_pitch; + dc_shade_constants = ::dc_shade_constants; + vlinebits = ::vlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = ::bufplce[i]; + } + } + +#ifdef NO_SSE + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = vlinebits; + DWORD place; + + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = dc_shade_constants; + + do + { + dest[0] = shade_pal_index(bufplce[0][(place = vplce[0]) >> bits], light0, shade_constants); vplce[0] = place + vince[0]; + dest[1] = shade_pal_index(bufplce[1][(place = vplce[1]) >> bits], light1, shade_constants); vplce[1] = place + vince[1]; + dest[2] = shade_pal_index(bufplce[2][(place = vplce[2]) >> bits], light2, shade_constants); vplce[2] = place + vince[2]; + dest[3] = shade_pal_index(bufplce[3][(place = vplce[3]) >> bits], light3, shade_constants); vplce[3] = place + vince[3]; + dest += dc_pitch; + } while (--count); + } +#else + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = vlinebits; + + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + BYTE p0 = bufplce[0][place0 >> bits]; + BYTE p1 = bufplce[1][place1 >> bits]; + BYTE p2 = bufplce[2][place2 >> bits]; + BYTE p3 = bufplce[3][place3 >> bits]; + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += dc_pitch; + } while (--count); + } + else + { + SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + BYTE p0 = bufplce[0][place0 >> bits]; + BYTE p1 = bufplce[1][place1 >> bits]; + BYTE p2 = bufplce[2][place2 >> bits]; + BYTE p3 = bufplce[3][place3 >> bits]; + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + dest += dc_pitch; + } while (--count); + } + } +#endif +}; + +class Mvlinec1RGBACommand : public DrawerCommand +{ + fixed_t dc_iscale; + fixed_t dc_texturefrac; + int dc_count; + const BYTE *dc_source; + BYTE *dc_dest; + int mvlinebits; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + +public: + Mvlinec1RGBACommand() + { + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_count = ::dc_count; + dc_source = ::dc_source; + dc_dest = ::dc_dest; + mvlinebits = ::mvlinebits; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + int count = dc_count; + const BYTE *source = dc_source; + uint32_t *dest = (uint32_t*)dc_dest; + int bits = mvlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + do + { + BYTE pix = source[frac >> bits]; + if (pix != 0) + { + *dest = shade_pal_index(pix, light, shade_constants); + } + frac += fracstep; + dest += pitch; + } while (--count); + } +}; + +class Mvlinec4RGBACommand : public DrawerCommand +{ + BYTE *dc_dest; + int dc_count; + int dc_pitch; + ShadeConstants dc_shade_constants; + int mvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const BYTE *bufplce[4]; + +public: + Mvlinec4RGBACommand() + { + dc_dest = ::dc_dest; + dc_count = ::dc_count; + dc_pitch = ::dc_pitch; + dc_shade_constants = ::dc_shade_constants; + mvlinebits = ::mvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = ::bufplce[i]; + } + } + +#ifdef NO_SSE + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = mvlinebits; + DWORD place; + + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = dc_shade_constants; + + do + { + BYTE pix; + pix = bufplce[0][(place = vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(pix, light0, shade_constants); vplce[0] = place + vince[0]; + pix = bufplce[1][(place = vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(pix, light1, shade_constants); vplce[1] = place + vince[1]; + pix = bufplce[2][(place = vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(pix, light2, shade_constants); vplce[2] = place + vince[2]; + pix = bufplce[3][(place = vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(pix, light3, shade_constants); vplce[3] = place + vince[3]; + dest += dc_pitch; + } while (--count); + } +#else + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = mvlinebits; + + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + BYTE pix0 = bufplce[0][place0 >> bits]; + BYTE pix1 = bufplce[1][place1 >> bits]; + BYTE pix2 = bufplce[2][place2 >> bits]; + BYTE pix3 = bufplce[3][place3 >> bits]; + + // movemask = !(pix == 0) + __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); + SSE_SHADE_SIMPLE(fg); + _mm_maskmoveu_si128(fg, movemask, (char*)dest); + dest += dc_pitch; + } while (--count); + } + else + { + SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + BYTE pix0 = bufplce[0][place0 >> bits]; + BYTE pix1 = bufplce[1][place1 >> bits]; + BYTE pix2 = bufplce[2][place2 >> bits]; + BYTE pix3 = bufplce[3][place3 >> bits]; + + // movemask = !(pix == 0) + __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); + SSE_SHADE(fg, shade_constants); + _mm_maskmoveu_si128(fg, movemask, (char*)dest); + dest += dc_pitch; + } while (--count); + } + } +#endif +}; + +class Tmvline1AddRGBACommand : public DrawerCommand +{ + fixed_t dc_iscale; + fixed_t dc_texturefrac; + int dc_count; + const BYTE *dc_source; + BYTE *dc_dest; + int tmvlinebits; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + Tmvline1AddRGBACommand() + { + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_count = ::dc_count; + dc_source = ::dc_source; + dc_dest = ::dc_dest; + tmvlinebits = ::tmvlinebits; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + int count = dc_count; + const BYTE *source = dc_source; + uint32_t *dest = (uint32_t*)dc_dest; + int bits = tmvlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + BYTE pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(pix, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + frac += fracstep; + dest += pitch; + } while (--count); + } +}; + +class Tmvline4AddRGBACommand : public DrawerCommand +{ + BYTE *dc_dest; + int dc_count; + int dc_pitch; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + int tmvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const BYTE *bufplce[4]; + +public: + Tmvline4AddRGBACommand() + { + dc_dest = ::dc_dest; + dc_count = ::dc_count; + dc_pitch = ::dc_pitch; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + tmvlinebits = ::tmvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = ::bufplce[i]; + } + } + + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = tmvlinebits; + + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + for (int i = 0; i < 4; ++i) + { + BYTE pix = bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(pix, light[i], shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + vplce[i] += vince[i]; + } + dest += dc_pitch; + } while (--count); + } +}; + +class Tmvline1AddClampRGBACommand : public DrawerCommand +{ + fixed_t dc_iscale; + fixed_t dc_texturefrac; + int dc_count; + const BYTE *dc_source; + BYTE *dc_dest; + int tmvlinebits; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + Tmvline1AddClampRGBACommand() + { + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_count = ::dc_count; + dc_source = ::dc_source; + dc_dest = ::dc_dest; + tmvlinebits = ::tmvlinebits; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + int count = dc_count; + const BYTE *source = dc_source; + uint32_t *dest = (uint32_t*)dc_dest; + int bits = tmvlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + BYTE pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(pix, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + frac += fracstep; + dest += pitch; + } while (--count); + } +}; + +class Tmvline4AddClampRGBACommand : public DrawerCommand +{ + BYTE *dc_dest; + int dc_count; + int dc_pitch; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + int tmvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const BYTE *bufplce[4]; + +public: + Tmvline4AddClampRGBACommand() + { + dc_dest = ::dc_dest; + dc_count = ::dc_count; + dc_pitch = ::dc_pitch; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + tmvlinebits = ::tmvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = ::bufplce[i]; + } + } + + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = tmvlinebits; + + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + for (int i = 0; i < 4; ++i) + { + BYTE pix = bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(pix, light[i], shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + vplce[i] += vince[i]; + } + dest += dc_pitch; + } while (--count); + } +}; + +class Tmvline1SubClampRGBACommand : public DrawerCommand +{ + fixed_t dc_iscale; + fixed_t dc_texturefrac; + int dc_count; + const BYTE *dc_source; + BYTE *dc_dest; + int tmvlinebits; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + Tmvline1SubClampRGBACommand() + { + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_count = ::dc_count; + dc_source = ::dc_source; + dc_dest = ::dc_dest; + tmvlinebits = ::tmvlinebits; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + int count = dc_count; + const BYTE *source = dc_source; + uint32_t *dest = (uint32_t*)dc_dest; + int bits = tmvlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + BYTE pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(pix, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + frac += fracstep; + dest += pitch; + } while (--count); + } +}; + +class Tmvline4SubClampRGBACommand : public DrawerCommand +{ + BYTE *dc_dest; + int dc_count; + int dc_pitch; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + int tmvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const BYTE *bufplce[4]; + +public: + Tmvline4SubClampRGBACommand() + { + dc_dest = ::dc_dest; + dc_count = ::dc_count; + dc_pitch = ::dc_pitch; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + tmvlinebits = ::tmvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = ::bufplce[i]; + } + } + + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = tmvlinebits; + + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + for (int i = 0; i < 4; ++i) + { + BYTE pix = bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(pix, light[i], shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + vplce[i] += vince[i]; + } + dest += dc_pitch; + } while (--count); + } +}; + +class Tmvline1RevSubClampRGBACommand : public DrawerCommand +{ + fixed_t dc_iscale; + fixed_t dc_texturefrac; + int dc_count; + const BYTE *dc_source; + BYTE *dc_dest; + int tmvlinebits; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + Tmvline1RevSubClampRGBACommand() + { + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_count = ::dc_count; + dc_source = ::dc_source; + dc_dest = ::dc_dest; + tmvlinebits = ::tmvlinebits; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + int count = dc_count; + const BYTE *source = dc_source; + uint32_t *dest = (uint32_t*)dc_dest; + int bits = tmvlinebits; + int pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + BYTE pix = source[frac >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(pix, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } + frac += fracstep; + dest += pitch; + } while (--count); + } +}; + +class Tmvline4RevSubClampRGBACommand : public DrawerCommand +{ + BYTE *dc_dest; + int dc_count; + int dc_pitch; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + int tmvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const BYTE *bufplce[4]; + +public: + Tmvline4RevSubClampRGBACommand() + { + dc_dest = ::dc_dest; + dc_count = ::dc_count; + dc_pitch = ::dc_pitch; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + tmvlinebits = ::tmvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = ::bufplce[i]; + } + } + + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t*)dc_dest; + int count = dc_count; + int bits = tmvlinebits; + + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do + { + for (int i = 0; i < 4; ++i) + { + BYTE pix = bufplce[i][vplce[i] >> bits]; + if (pix != 0) + { + uint32_t fg = shade_pal_index(pix, light[i], shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + vplce[i] += vince[i]; + } + dest += dc_pitch; + } while (--count); + } +}; + +class DrawFogBoundaryLineRGBACommand : public DrawerCommand +{ + int _y; + int _x; + int _x2; + BYTE *dc_destorg; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + +public: + DrawFogBoundaryLineRGBACommand(int y, int x, int x2) + { + _y = y; + _x = x; + _x2 = x2; + + dc_destorg = ::dc_destorg; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + int y = _y; + int x = _x; + int x2 = _x2; + + uint32_t *dest = ylookup[y] + (uint32_t*)dc_destorg; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants constants = dc_shade_constants; + + do + { + uint32_t red = (dest[x] >> 16) & 0xff; + uint32_t green = (dest[x] >> 8) & 0xff; + uint32_t blue = dest[x] & 0xff; + + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; + + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; + + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; + + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + + dest[x] = 0xff000000 | (red << 16) | (green << 8) | blue; + } while (++x <= x2); + } +}; + +///////////////////////////////////////////////////////////////////////////// + +void R_FinishDrawerCommands() +{ + DrawerCommandQueue::Finish(); +} + +void R_DrawColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_FillColumnP_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_FillAddColumn_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_FillAddClampColumn_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_FillSubClampColumn_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_FillRevSubClampColumn_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawFuzzColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); + fuzzpos = (fuzzpos + dc_yh - dc_yl) % FUZZTABLE; +} + +void R_DrawAddColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawTranslatedColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawTlatedAddColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawShadedColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawAddClampColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawAddClampTranslatedColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawSubClampColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawSubClampTranslatedColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawRevSubClampColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawRevSubClampTranslatedColumnP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawSpanP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawSpanMaskedP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawSpanTranslucentP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawSpanMaskedTranslucentP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawSpanAddClampP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawSpanMaskedAddClampP_RGBA_C() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_FillSpan_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +DWORD vlinec1_RGBA() +{ + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; +} + +void vlinec4_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +DWORD mvlinec1_RGBA() +{ + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; +} + +void mvlinec4_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +fixed_t tmvline1_add_RGBA() +{ + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; +} + +void tmvline4_add_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +fixed_t tmvline1_addclamp_RGBA() +{ + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; +} + +void tmvline4_addclamp_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +fixed_t tmvline1_subclamp_RGBA() +{ + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; +} + +void tmvline4_subclamp_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +fixed_t tmvline1_revsubclamp_RGBA() +{ + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; +} + +void tmvline4_revsubclamp_RGBA() +{ + DrawerCommandQueue::QueueCommand(); +} + +void R_DrawFogBoundarySection_RGBA(int y, int y2, int x1) +{ + for (; y < y2; ++y) + { + int x2 = spanend[y]; + DrawerCommandQueue::QueueCommand(y, x1, x2); + } +} + +void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip) +{ + // To do: we do not need to create new spans when using rgba output - instead we should calculate light on a per pixel basis + + // This is essentially the same as R_MapVisPlane but with an extra step + // to create new horizontal spans whenever the light changes enough that + // we need to use a new colormap. + + double lightstep = rw_lightstep; + double light = rw_light + rw_lightstep*(x2 - x1 - 1); + int x = x2 - 1; + int t2 = uclip[x]; + int b2 = dclip[x]; + int rcolormap = GETPALOOKUP(light, wallshade); + int lcolormap; + BYTE *basecolormapdata = basecolormap->Maps; + + if (b2 > t2) + { + clearbufshort(spanend + t2, b2 - t2, x); + } + + R_SetColorMapLight(basecolormap, (float)light, wallshade); + + BYTE *fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + + for (--x; x >= x1; --x) + { + int t1 = uclip[x]; + int b1 = dclip[x]; + const int xr = x + 1; + int stop; + + light -= rw_lightstep; + lcolormap = GETPALOOKUP(light, wallshade); + if (lcolormap != rcolormap) + { + if (t2 < b2 && rcolormap != 0) + { // Colormap 0 is always the identity map, so rendering it is + // just a waste of time. + R_DrawFogBoundarySection_RGBA(t2, b2, xr); + } + if (t1 < t2) t2 = t1; + if (b1 > b2) b2 = b1; + if (t2 < b2) + { + clearbufshort(spanend + t2, b2 - t2, x); + } + rcolormap = lcolormap; + R_SetColorMapLight(basecolormap, (float)light, wallshade); + fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + } + else + { + if (fake_dc_colormap != basecolormapdata) + { + stop = MIN(t1, b2); + while (t2 < stop) + { + int y = t2++; + DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); + } + stop = MAX(b1, t2); + while (b2 > stop) + { + int y = --b2; + DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); + } + } + else + { + t2 = MAX(t2, MIN(t1, b2)); + b2 = MIN(b2, MAX(b1, t2)); + } + + stop = MIN(t2, b1); + while (t1 < stop) + { + spanend[t1++] = x; + } + stop = MAX(b2, t2); + while (b1 > stop) + { + spanend[--b1] = x; + } + } + + t2 = uclip[x]; + b2 = dclip[x]; + } + if (t2 < b2 && rcolormap != 0) + { + R_DrawFogBoundarySection_RGBA(t2, b2, x1); + } +} diff --git a/src/r_drawt.cpp b/src/r_drawt.cpp index ca6862ed6..c829c2dc4 100644 --- a/src/r_drawt.cpp +++ b/src/r_drawt.cpp @@ -1019,6 +1019,14 @@ void rt_initcols_pal (BYTE *buff) horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0]; } +void rt_span_coverage_pal(int x, int start, int stop) +{ + unsigned int **tspan = &dc_ctspan[x & 3]; + (*tspan)[0] = start; + (*tspan)[1] = stop; + *tspan += 2; +} + // Stretches a column into a temporary buffer which is later // drawn to the screen along with up to three other columns. void R_DrawColumnHorizP_C (void) diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index ff5c0d82f..bbf68a795 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -46,53 +46,1560 @@ #include #endif -uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT*4]; -uint32_t *dc_temp_rgba; - -// Defined in r_draw_t.cpp: extern unsigned int dc_tspans[4][MAXHEIGHT]; extern unsigned int *dc_ctspan[4]; extern unsigned int *horizspan[4]; +///////////////////////////////////////////////////////////////////////////// + +class RtCopy1colRGBACommand : public DrawerCommand +{ + int hx; + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + +public: + RtCopy1colRGBACommand(int hx, int sx, int yl, int yh) + { + this->hx = hx; + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4 + hx]; + pitch = dc_pitch; + + if (count & 1) { + *dest = *source; + source += 4; + dest += pitch; + } + if (count & 2) { + dest[0] = source[0]; + dest[pitch] = source[4]; + source += 8; + dest += pitch * 2; + } + if (!(count >>= 2)) + return; + + do { + dest[0] = source[0]; + dest[pitch] = source[4]; + dest[pitch * 2] = source[8]; + dest[pitch * 3] = source[12]; + source += 16; + dest += pitch * 4; + } while (--count); + } +}; + +class RtMap1colRGBACommand : public DrawerCommand +{ + int hx; + int sx; + int yl; + int yh; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + BYTE *dc_destorg; + int dc_pitch; + +public: + RtMap1colRGBACommand(int hx, int sx, int yl, int yh) + { + this->hx = hx; + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4 + hx]; + pitch = dc_pitch; + + if (count & 1) { + *dest = shade_pal_index(*source, light, shade_constants); + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + dest[0] = shade_pal_index(source[0], light, shade_constants); + dest[pitch] = shade_pal_index(source[4], light, shade_constants); + source += 8; + dest += pitch * 2; + } while (--count); + } +}; + +class RtMap4colsRGBACommand : public DrawerCommand +{ + int sx; + int yl; + int yh; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + BYTE *dc_destorg; + int dc_pitch; + +public: + RtMap4colsRGBACommand(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + } + +#ifdef NO_SSE + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + if (count & 1) { + dest[0] = shade_pal_index(source[0], light, shade_constants); + dest[1] = shade_pal_index(source[1], light, shade_constants); + dest[2] = shade_pal_index(source[2], light, shade_constants); + dest[3] = shade_pal_index(source[3], light, shade_constants); + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + dest[0] = shade_pal_index(source[0], light, shade_constants); + dest[1] = shade_pal_index(source[1], light, shade_constants); + dest[2] = shade_pal_index(source[2], light, shade_constants); + dest[3] = shade_pal_index(source[3], light, shade_constants); + dest[pitch] = shade_pal_index(source[4], light, shade_constants); + dest[pitch + 1] = shade_pal_index(source[5], light, shade_constants); + dest[pitch + 2] = shade_pal_index(source[6], light, shade_constants); + dest[pitch + 3] = shade_pal_index(source[7], light, shade_constants); + source += 8; + dest += pitch * 2; + } while (--count); + } +#else + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(dc_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT(light); + + if (count & 1) { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + // shade_pal_index 0-3 + { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + } + + // shade_pal_index 4-7 (pitch) + { + uint32_t p0 = source[4]; + uint32_t p1 = source[5]; + uint32_t p2 = source[6]; + uint32_t p3 = source[7]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)(dest + pitch), fg); + } + + source += 8; + dest += pitch * 2; + } while (--count); + } + else + { + SSE_SHADE_INIT(light, shade_constants); + + if (count & 1) { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + + source += 4; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + // shade_pal_index 0-3 + { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + } + + // shade_pal_index 4-7 (pitch) + { + uint32_t p0 = source[4]; + uint32_t p1 = source[5]; + uint32_t p2 = source[6]; + uint32_t p3 = source[7]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)(dest + pitch), fg); + } + + source += 8; + dest += pitch * 2; + } while (--count); + } + } +#endif +}; + +class RtTranslate1colRGBACommand : public DrawerCommand +{ + const BYTE *translation; + int hx; + int yl; + int yh; + +public: + RtTranslate1colRGBACommand(const BYTE *translation, int hx, int yl, int yh) + { + this->translation = translation; + this->hx = hx; + this->yl = yl; + this->yh = yh; + } + + void Execute(DrawerThread *thread) override + { + int count = yh - yl + 1; + uint32_t *source = &thread->dc_temp_rgba[yl*4 + hx]; + + // Things we do to hit the compiler's optimizer with a clue bat: + // 1. Parallelism is explicitly spelled out by using a separate + // C instruction for each assembly instruction. GCC lets me + // have four temporaries, but VC++ spills to the stack with + // more than two. Two is probably optimal, anyway. + // 2. The results of the translation lookups are explicitly + // stored in byte-sized variables. This causes the VC++ code + // to use byte mov instructions in most cases; for apparently + // random reasons, it will use movzx for some places. GCC + // ignores this and uses movzx always. + + // Do 8 rows at a time. + for (int count8 = count >> 3; count8; --count8) + { + int c0, c1; + BYTE b0, b1; + + c0 = source[0]; c1 = source[4]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[4] = b1; + + c0 = source[8]; c1 = source[12]; + b0 = translation[c0]; b1 = translation[c1]; + source[8] = b0; source[12] = b1; + + c0 = source[16]; c1 = source[20]; + b0 = translation[c0]; b1 = translation[c1]; + source[16] = b0; source[20] = b1; + + c0 = source[24]; c1 = source[28]; + b0 = translation[c0]; b1 = translation[c1]; + source[24] = b0; source[28] = b1; + + source += 32; + } + // Finish by doing 1 row at a time. + for (count &= 7; count; --count, source += 4) + { + source[0] = translation[source[0]]; + } + } +}; + +class RtTranslate4colsRGBACommand : public DrawerCommand +{ + const BYTE *translation; + int yl; + int yh; + +public: + RtTranslate4colsRGBACommand(const BYTE *translation, int yl, int yh) + { + this->translation = translation; + this->yl = yl; + this->yh = yh; + } + + void Execute(DrawerThread *thread) override + { + int count = yh - yl + 1; + uint32_t *source = &thread->dc_temp_rgba[yl*4]; + int c0, c1; + BYTE b0, b1; + + // Do 2 rows at a time. + for (int count8 = count >> 1; count8; --count8) + { + c0 = source[0]; c1 = source[1]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[1] = b1; + + c0 = source[2]; c1 = source[3]; + b0 = translation[c0]; b1 = translation[c1]; + source[2] = b0; source[3] = b1; + + c0 = source[4]; c1 = source[5]; + b0 = translation[c0]; b1 = translation[c1]; + source[4] = b0; source[5] = b1; + + c0 = source[6]; c1 = source[7]; + b0 = translation[c0]; b1 = translation[c1]; + source[6] = b0; source[7] = b1; + + source += 8; + } + // Do the final row if count was odd. + if (count & 1) + { + c0 = source[0]; c1 = source[1]; + b0 = translation[c0]; b1 = translation[c1]; + source[0] = b0; source[1] = b1; + + c0 = source[2]; c1 = source[3]; + b0 = translation[c0]; b1 = translation[c1]; + source[2] = b0; source[3] = b1; + } + } +}; + +class RtAdd1colRGBACommand : public DrawerCommand +{ + int hx; + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + RtAdd1colRGBACommand(int hx, int sx, int yl, int yh) + { + this->hx = hx; + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4 + hx]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do { + uint32_t fg = shade_pal_index(*source, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + + source += 4; + dest += pitch; + } while (--count); + } +}; + +class RtAdd4colsRGBACommand : public DrawerCommand +{ + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + +public: + RtAdd4colsRGBACommand(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + } + +#ifdef NO_SSE + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(source[i], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + + source += 4; + dest += pitch; + } while (--count); + } +#else + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + ShadeConstants shade_constants = dc_shade_constants; + + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT(light); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } + else + { + SSE_SHADE_INIT(light, shade_constants); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } + } +#endif +}; + +class RtShaded1colRGBACommand : public DrawerCommand +{ + int hx; + int sx; + int yl; + int yh; + lighttable_t *dc_colormap; + BYTE *dc_destorg; + int dc_pitch; + int dc_color; + fixed_t dc_light; + +public: + RtShaded1colRGBACommand(int hx, int sx, int yl, int yh) + { + this->hx = hx; + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_colormap = ::dc_colormap; + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_color = ::dc_color; + dc_light = ::dc_light; + } + + void Execute(DrawerThread *thread) override + { + BYTE *colormap; + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + colormap = dc_colormap; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4 + hx]; + pitch = dc_pitch; + + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do { + uint32_t alpha = colormap[*source]; + uint32_t inv_alpha = 64 - alpha; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; + uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; + uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); + } +}; + +class RtShaded4colsRGBACommand : public DrawerCommand +{ + int sx; + int yl; + int yh; + lighttable_t *dc_colormap; + int dc_color; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + +public: + RtShaded4colsRGBACommand(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_colormap = ::dc_colormap; + dc_color = ::dc_color; + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + } + +#ifdef NO_SSE + void Execute(DrawerThread *thread) override + { + BYTE *colormap; + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + colormap = dc_colormap; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + do { + for (int i = 0; i < 4; i++) + { + uint32_t alpha = colormap[source[i]]; + uint32_t inv_alpha = 64 - alpha; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; + uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; + uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + source += 4; + dest += pitch; + } while (--count); + } +#else + void Execute(DrawerThread *thread) override + { + BYTE *colormap; + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + colormap = dc_colormap; + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light))), _mm_setzero_si128()); + __m128i alpha_one = _mm_set1_epi16(64); + + do { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + __m128i alpha_hi = _mm_set_epi16(64, p3, p3, p3, 64, p2, p2, p2); + __m128i alpha_lo = _mm_set_epi16(64, p1, p1, p1, 64, p0, p0, p0); + __m128i inv_alpha_hi = _mm_subs_epu16(alpha_one, alpha_hi); + __m128i inv_alpha_lo = _mm_subs_epu16(alpha_one, alpha_lo); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * alpha + bg_red * inv_alpha) / 64: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_hi), _mm_mullo_epi16(bg_hi, inv_alpha_hi)), 6); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_lo), _mm_mullo_epi16(bg_lo, inv_alpha_lo)), 6); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } +#endif +}; + +class RtAddClamp1colRGBACommand : public DrawerCommand +{ + int hx; + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + +public: + RtAddClamp1colRGBACommand(int hx, int sx, int yl, int yh) + { + this->hx = hx; + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_shade_constants = ::dc_shade_constants; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4 + hx]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do { + uint32_t fg = shade_pal_index(*source, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); + } +}; + +class RtAddClamp4colsRGBACommand : public DrawerCommand +{ + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + ShadeConstants dc_shade_constants; + +public: + RtAddClamp4colsRGBACommand(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + dc_shade_constants = ::dc_shade_constants; + } + +#ifdef NO_SSE + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(source[i], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + source += 4; + dest += pitch; + } while (--count); + } +#else + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + ShadeConstants shade_constants = dc_shade_constants; + + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT(light); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } + else + { + SSE_SHADE_INIT(light, shade_constants); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + SSE_SHADE(fg, shade_constants); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += 4; + dest += pitch; + } while (--count); + } + } +#endif +}; + +class RtSubClamp1colRGBACommand : public DrawerCommand +{ + int hx; + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + ShadeConstants dc_shade_constants; + +public: + RtSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) + { + this->hx = hx; + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + dc_shade_constants = ::dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4 + hx]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do { + uint32_t fg = shade_pal_index(*source, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); + } +}; + +class RtSubClamp4colsRGBACommand : public DrawerCommand +{ + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + ShadeConstants dc_shade_constants; + +public: + RtSubClamp4colsRGBACommand(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + dc_shade_constants = ::dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(source[i], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + + source += 4; + dest += pitch; + } while (--count); + } +}; + +class RtRevSubClamp1colRGBACommand : public DrawerCommand +{ + int hx; + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + ShadeConstants dc_shade_constants; + +public: + RtRevSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) + { + this->hx = hx; + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + dc_shade_constants = ::dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4 + hx]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do { + uint32_t fg = shade_pal_index(*source, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + source += 4; + dest += pitch; + } while (--count); + } +}; + +class RtRevSubClamp4colsRGBACommand : public DrawerCommand +{ + int sx; + int yl; + int yh; + BYTE *dc_destorg; + int dc_pitch; + fixed_t dc_light; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + ShadeConstants dc_shade_constants; + +public: + RtRevSubClamp4colsRGBACommand(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + dc_light = ::dc_light; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; + dc_shade_constants = ::dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + + count = yh - yl; + if (count < 0) + return; + count++; + + dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; + source = &thread->dc_temp_rgba[yl * 4]; + pitch = dc_pitch; + + uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = dc_shade_constants; + + uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + + do { + for (int i = 0; i < 4; i++) + { + uint32_t fg = shade_pal_index(source[i], light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; + } + + source += 4; + dest += pitch; + } while (--count); + } +}; + +class RtInitColsRGBACommand : public DrawerCommand +{ + BYTE *buff; + +public: + RtInitColsRGBACommand(BYTE *buff) + { + this->buff = buff; + } + + void Execute(DrawerThread *thread) override + { + thread->dc_temp_rgba = buff == NULL ? thread->dc_temp_rgbabuff_rgba : (uint32_t*)buff; + } +}; + +class DrawColumnHorizRGBACommand : public DrawerCommand +{ + int dc_count; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + const BYTE *dc_source; + int dc_x; + int dc_yl; + int dc_yh; + +public: + DrawColumnHorizRGBACommand() + { + dc_count = ::dc_count; + dc_iscale = ::dc_iscale; + dc_texturefrac = ::dc_texturefrac; + dc_source = ::dc_source; + dc_x = ::dc_x; + dc_yl = ::dc_yl; + dc_yh = ::dc_yh; + } + + void Execute(DrawerThread *thread) override + { + int count = dc_count; + uint32_t *dest; + fixed_t fracstep; + fixed_t frac; + + if (count <= 0) + return; + + { + int x = dc_x & 3; + dest = &thread->dc_temp_rgba[x + 4 * dc_yl]; + } + fracstep = dc_iscale; + frac = dc_texturefrac; + + { + const BYTE *source = dc_source; + + if (count & 1) { + *dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep; + } + if (count & 2) { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest += 8; + } + if (count & 4) { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest[8] = source[frac >> FRACBITS]; frac += fracstep; + dest[12] = source[frac >> FRACBITS]; frac += fracstep; + dest += 16; + } + count >>= 3; + if (!count) return; + + do + { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest[8] = source[frac >> FRACBITS]; frac += fracstep; + dest[12] = source[frac >> FRACBITS]; frac += fracstep; + dest[16] = source[frac >> FRACBITS]; frac += fracstep; + dest[20] = source[frac >> FRACBITS]; frac += fracstep; + dest[24] = source[frac >> FRACBITS]; frac += fracstep; + dest[28] = source[frac >> FRACBITS]; frac += fracstep; + dest += 32; + } while (--count); + } + } +}; + +class FillColumnHorizRGBACommand : public DrawerCommand +{ + int dc_x; + int dc_yl; + int dc_yh; + int dc_count; + int dc_color; + +public: + FillColumnHorizRGBACommand() + { + dc_x = ::dc_x; + dc_count = ::dc_count; + dc_color = ::dc_color; + dc_yl = ::dc_yl; + dc_yh = ::dc_yh; + } + + void Execute(DrawerThread *thread) override + { + int count = dc_count; + int color = dc_color; + uint32_t *dest; + + if (count <= 0) + return; + + { + int x = dc_x & 3; + dest = &thread->dc_temp_rgba[x + 4 * dc_yl]; + } + + if (count & 1) { + *dest = color; + dest += 4; + } + if (!(count >>= 1)) + return; + do { + dest[0] = color; dest[4] = color; + dest += 8; + } while (--count); + } +}; + +///////////////////////////////////////////////////////////////////////////// + // Copies one span at hx to the screen at sx. void rt_copy1col_RGBA_c (int hx, int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4 + hx]; - pitch = dc_pitch; - - if (count & 1) { - *dest = *source; - source += 4; - dest += pitch; - } - if (count & 2) { - dest[0] = source[0]; - dest[pitch] = source[4]; - source += 8; - dest += pitch*2; - } - if (!(count >>= 2)) - return; - - do { - dest[0] = source[0]; - dest[pitch] = source[4]; - dest[pitch*2] = source[8]; - dest[pitch*3] = source[12]; - source += 16; - dest += pitch*4; - } while (--count); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Copies all four spans to the screen starting at sx. @@ -108,293 +1615,23 @@ void rt_copy4cols_RGBA_c (int sx, int yl, int yh) // Maps one span at hx to the screen at sx. void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4 + hx]; - pitch = dc_pitch; - - if (count & 1) { - *dest = shade_pal_index(*source, light, shade_constants); - source += 4; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - dest[0] = shade_pal_index(source[0], light, shade_constants); - dest[pitch] = shade_pal_index(source[4], light, shade_constants); - source += 8; - dest += pitch*2; - } while (--count); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Maps all four spans to the screen starting at sx. void rt_map4cols_RGBA_c (int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4]; - pitch = dc_pitch; - - if (count & 1) { - dest[0] = shade_pal_index(source[0], light, shade_constants); - dest[1] = shade_pal_index(source[1], light, shade_constants); - dest[2] = shade_pal_index(source[2], light, shade_constants); - dest[3] = shade_pal_index(source[3], light, shade_constants); - source += 4; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - dest[0] = shade_pal_index(source[0], light, shade_constants); - dest[1] = shade_pal_index(source[1], light, shade_constants); - dest[2] = shade_pal_index(source[2], light, shade_constants); - dest[3] = shade_pal_index(source[3], light, shade_constants); - dest[pitch] = shade_pal_index(source[4], light, shade_constants); - dest[pitch + 1] = shade_pal_index(source[5], light, shade_constants); - dest[pitch + 2] = shade_pal_index(source[6], light, shade_constants); - dest[pitch + 3] = shade_pal_index(source[7], light, shade_constants); - source += 8; - dest += pitch*2; - } while (--count); -} - -// Maps all four spans to the screen starting at sx. -void rt_map4cols_RGBA_SSE(int sx, int yl, int yh) -{ - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh - yl; - if (count < 0) - return; - count++; - - ShadeConstants shade_constants = dc_shade_constants; - uint32_t light = calc_light_multiplier(dc_light); - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl * 4]; - pitch = dc_pitch; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT(light); - - if (count & 1) { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - - source += 4; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - // shade_pal_index 0-3 - { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - } - - // shade_pal_index 4-7 (pitch) - { - uint32_t p0 = source[4]; - uint32_t p1 = source[5]; - uint32_t p2 = source[6]; - uint32_t p3 = source[7]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)(dest + pitch), fg); - } - - source += 8; - dest += pitch * 2; - } while (--count); - } - else - { - SSE_SHADE_INIT(light, shade_constants); - - if (count & 1) { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - - source += 4; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - // shade_pal_index 0-3 - { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - } - - // shade_pal_index 4-7 (pitch) - { - uint32_t p0 = source[4]; - uint32_t p1 = source[5]; - uint32_t p2 = source[6]; - uint32_t p3 = source[7]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)(dest + pitch), fg); - } - - source += 8; - dest += pitch * 2; - } while (--count); - } + DrawerCommandQueue::QueueCommand(sx, yl, yh); } void rt_Translate1col_RGBA_c(const BYTE *translation, int hx, int yl, int yh) { - int count = yh - yl + 1; - uint32_t *source = &dc_temp_rgba[yl*4 + hx]; - - // Things we do to hit the compiler's optimizer with a clue bat: - // 1. Parallelism is explicitly spelled out by using a separate - // C instruction for each assembly instruction. GCC lets me - // have four temporaries, but VC++ spills to the stack with - // more than two. Two is probably optimal, anyway. - // 2. The results of the translation lookups are explicitly - // stored in byte-sized variables. This causes the VC++ code - // to use byte mov instructions in most cases; for apparently - // random reasons, it will use movzx for some places. GCC - // ignores this and uses movzx always. - - // Do 8 rows at a time. - for (int count8 = count >> 3; count8; --count8) - { - int c0, c1; - BYTE b0, b1; - - c0 = source[0]; c1 = source[4]; - b0 = translation[c0]; b1 = translation[c1]; - source[0] = b0; source[4] = b1; - - c0 = source[8]; c1 = source[12]; - b0 = translation[c0]; b1 = translation[c1]; - source[8] = b0; source[12] = b1; - - c0 = source[16]; c1 = source[20]; - b0 = translation[c0]; b1 = translation[c1]; - source[16] = b0; source[20] = b1; - - c0 = source[24]; c1 = source[28]; - b0 = translation[c0]; b1 = translation[c1]; - source[24] = b0; source[28] = b1; - - source += 32; - } - // Finish by doing 1 row at a time. - for (count &= 7; count; --count, source += 4) - { - source[0] = translation[source[0]]; - } + DrawerCommandQueue::QueueCommand(translation, hx, yl, yh); } void rt_Translate4cols_RGBA_c(const BYTE *translation, int yl, int yh) { - int count = yh - yl + 1; - uint32_t *source = &dc_temp_rgba[yl*4]; - int c0, c1; - BYTE b0, b1; - - // Do 2 rows at a time. - for (int count8 = count >> 1; count8; --count8) - { - c0 = source[0]; c1 = source[1]; - b0 = translation[c0]; b1 = translation[c1]; - source[0] = b0; source[1] = b1; - - c0 = source[2]; c1 = source[3]; - b0 = translation[c0]; b1 = translation[c1]; - source[2] = b0; source[3] = b1; - - c0 = source[4]; c1 = source[5]; - b0 = translation[c0]; b1 = translation[c1]; - source[4] = b0; source[5] = b1; - - c0 = source[6]; c1 = source[7]; - b0 = translation[c0]; b1 = translation[c1]; - source[6] = b0; source[7] = b1; - - source += 8; - } - // Do the final row if count was odd. - if (count & 1) - { - c0 = source[0]; c1 = source[1]; - b0 = translation[c0]; b1 = translation[c1]; - source[0] = b0; source[1] = b1; - - c0 = source[2]; c1 = source[3]; - b0 = translation[c0]; b1 = translation[c1]; - source[2] = b0; source[3] = b1; - } + DrawerCommandQueue::QueueCommand(translation, yl, yh); } // Translates one span at hx to the screen at sx. @@ -414,195 +1651,15 @@ void rt_tlate4cols_RGBA_c (int sx, int yl, int yh) // Adds one span at hx to the screen at sx without clamping. void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4 + hx]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do { - uint32_t fg = shade_pal_index(*source, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Adds all four spans to the screen starting at sx without clamping. void rt_add4cols_RGBA_c (int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = shade_pal_index(source[i], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(sx, yl, yh); } -// Adds all four spans to the screen starting at sx without clamping. -#ifndef NO_SSE -void rt_add4cols_RGBA_SSE(int sx, int yl, int yh) -{ - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh - yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl * 4]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - ShadeConstants shade_constants = dc_shade_constants; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT(light); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += 4; - dest += pitch; - } while (--count); - } - else - { - SSE_SHADE_INIT(light, shade_constants); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += 4; - dest += pitch; - } while (--count); - } -} -#endif - // Translates and adds one span at hx to the screen at sx without clamping. void rt_tlateadd1col_RGBA_c (int hx, int sx, int yl, int yh) { @@ -620,332 +1677,27 @@ void rt_tlateadd4cols_RGBA_c(int sx, int yl, int yh) // Shades one span at hx to the screen at sx. void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh) { - BYTE *colormap; - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - colormap = dc_colormap; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4 + hx]; - pitch = dc_pitch; - - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do { - uint32_t alpha = colormap[*source]; - uint32_t inv_alpha = 64 - alpha; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; - uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; - uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Shades all four spans to the screen starting at sx. void rt_shaded4cols_RGBA_c (int sx, int yl, int yh) { - BYTE *colormap; - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - colormap = dc_colormap; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4]; - pitch = dc_pitch; - - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do { - for (int i = 0; i < 4; i++) - { - uint32_t alpha = colormap[source[i]]; - uint32_t inv_alpha = 64 - alpha; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; - uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; - uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(sx, yl, yh); } -// Shades all four spans to the screen starting at sx. -#ifndef NO_SSE -void rt_shaded4cols_RGBA_SSE(int sx, int yl, int yh) -{ - BYTE *colormap; - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh - yl; - if (count < 0) - return; - count++; - - colormap = dc_colormap; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl * 4]; - pitch = dc_pitch; - - __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light))), _mm_setzero_si128()); - __m128i alpha_one = _mm_set1_epi16(64); - - do { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - __m128i alpha_hi = _mm_set_epi16(64, p3, p3, p3, 64, p2, p2, p2); - __m128i alpha_lo = _mm_set_epi16(64, p1, p1, p1, 64, p0, p0, p0); - __m128i inv_alpha_hi = _mm_subs_epu16(alpha_one, alpha_hi); - __m128i inv_alpha_lo = _mm_subs_epu16(alpha_one, alpha_lo); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * alpha + bg_red * inv_alpha) / 64: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_hi), _mm_mullo_epi16(bg_hi, inv_alpha_hi)), 6); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_lo), _mm_mullo_epi16(bg_lo, inv_alpha_lo)), 6); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += 4; - dest += pitch; - } while (--count); -} -#endif - // Adds one span at hx to the screen at sx with clamping. void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4 + hx]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do { - uint32_t fg = shade_pal_index(*source, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Adds all four spans to the screen starting at sx with clamping. void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = shade_pal_index(source[i], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(sx, yl, yh); } -// Adds all four spans to the screen starting at sx with clamping. -#ifndef NO_SSE -void rt_addclamp4cols_RGBA_SSE(int sx, int yl, int yh) -{ - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh - yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl * 4]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - ShadeConstants shade_constants = dc_shade_constants; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT(light); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += 4; - dest += pitch; - } while (--count); - } - else - { - SSE_SHADE_INIT(light, shade_constants); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += 4; - dest += pitch; - } while (--count); - } -} -#endif - // Translates and adds one span at hx to the screen at sx with clamping. void rt_tlateaddclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { @@ -963,91 +1715,13 @@ void rt_tlateaddclamp4cols_RGBA_c (int sx, int yl, int yh) // Subtracts one span at hx to the screen at sx with clamping. void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4 + hx]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do { - uint32_t fg = shade_pal_index(*source, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Subtracts all four spans to the screen starting at sx with clamping. void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = shade_pal_index(source[i], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and subtracts one span at hx to the screen at sx with clamping. @@ -1067,91 +1741,13 @@ void rt_tlatesubclamp4cols_RGBA_c (int sx, int yl, int yh) // Subtracts one span at hx from the screen at sx with clamping. void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4 + hx]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do { - uint32_t fg = shade_pal_index(*source, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Subtracts all four spans from the screen starting at sx with clamping. void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - - count = yh-yl; - if (count < 0) - return; - count++; - - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &dc_temp_rgba[yl*4]; - pitch = dc_pitch; - - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = shade_pal_index(source[i], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - - source += 4; - dest += pitch; - } while (--count); + DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and subtracts one span at hx from the screen at sx with clamping. @@ -1172,102 +1768,41 @@ void rt_tlaterevsubclamp4cols_RGBA_c (int sx, int yl, int yh) // call this function to set up the span pointers. void rt_initcols_rgba (BYTE *buff) { - int y; - - dc_temp_rgba = buff == NULL ? dc_temp_rgbabuff_rgba : (uint32_t*)buff; - for (y = 3; y >= 0; y--) + for (int y = 3; y >= 0; y--) horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0]; + + DrawerCommandQueue::QueueCommand(buff); +} + +void rt_span_coverage_rgba(int x, int start, int stop) +{ + unsigned int **tspan = &dc_ctspan[x & 3]; + (*tspan)[0] = start; + (*tspan)[1] = stop; + *tspan += 2; } // Stretches a column into a temporary buffer which is later // drawn to the screen along with up to three other columns. void R_DrawColumnHorizP_RGBA_C (void) { - int count = dc_count; - uint32_t *dest; - fixed_t fracstep; - fixed_t frac; + int x = dc_x & 3; + unsigned int **span = &dc_ctspan[x]; + (*span)[0] = dc_yl; + (*span)[1] = dc_yh; + *span += 2; - if (count <= 0) - return; - - { - int x = dc_x & 3; - unsigned int **span; - - span = &dc_ctspan[x]; - (*span)[0] = dc_yl; - (*span)[1] = dc_yh; - *span += 2; - dest = &dc_temp_rgba[x + 4*dc_yl]; - } - fracstep = dc_iscale; - frac = dc_texturefrac; - - { - const BYTE *source = dc_source; - - if (count & 1) { - *dest = source[frac>>FRACBITS]; dest += 4; frac += fracstep; - } - if (count & 2) { - dest[0] = source[frac>>FRACBITS]; frac += fracstep; - dest[4] = source[frac>>FRACBITS]; frac += fracstep; - dest += 8; - } - if (count & 4) { - dest[0] = source[frac>>FRACBITS]; frac += fracstep; - dest[4] = source[frac>>FRACBITS]; frac += fracstep; - dest[8] = source[frac>>FRACBITS]; frac += fracstep; - dest[12]= source[frac>>FRACBITS]; frac += fracstep; - dest += 16; - } - count >>= 3; - if (!count) return; - - do - { - dest[0] = source[frac>>FRACBITS]; frac += fracstep; - dest[4] = source[frac>>FRACBITS]; frac += fracstep; - dest[8] = source[frac>>FRACBITS]; frac += fracstep; - dest[12]= source[frac>>FRACBITS]; frac += fracstep; - dest[16]= source[frac>>FRACBITS]; frac += fracstep; - dest[20]= source[frac>>FRACBITS]; frac += fracstep; - dest[24]= source[frac>>FRACBITS]; frac += fracstep; - dest[28]= source[frac>>FRACBITS]; frac += fracstep; - dest += 32; - } while (--count); - } + DrawerCommandQueue::QueueCommand(); } // [RH] Just fills a column with a given color void R_FillColumnHorizP_RGBA_C (void) { - int count = dc_count; - BYTE color = dc_color; - uint32_t *dest; + int x = dc_x & 3; + unsigned int **span = &dc_ctspan[x]; + (*span)[0] = dc_yl; + (*span)[1] = dc_yh; + *span += 2; - if (count <= 0) - return; - - { - int x = dc_x & 3; - unsigned int **span = &dc_ctspan[x]; - - (*span)[0] = dc_yl; - (*span)[1] = dc_yh; - *span += 2; - dest = &dc_temp_rgba[x + 4*dc_yl]; - } - - if (count & 1) { - *dest = color; - dest += 4; - } - if (!(count >>= 1)) - return; - do { - dest[0] = color; dest[4] = color; - dest += 8; - } while (--count); + DrawerCommandQueue::QueueCommand(); } diff --git a/src/r_main.cpp b/src/r_main.cpp index a795f8016..348c70120 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -979,6 +979,8 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, R_SetupBuffer (); screen->Unlock (); + R_FinishDrawerCommands(); + viewactive = savedviewactive; r_swtruecolor = savedoutputformat; diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 645741a2a..62190b606 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -165,6 +165,7 @@ void FSoftwareRenderer::RenderView(player_t *player) R_RenderActorView (player->mo); // [RH] Let cameras draw onto textures that were visible this frame. FCanvasTextureInfo::UpdateAll (); + R_FinishDrawerCommands(); } //========================================================================== diff --git a/src/r_things.cpp b/src/r_things.cpp index c132cc2fd..b3a2daefe 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -706,10 +706,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop } else { - unsigned int **tspan = &dc_ctspan[x & 3]; - (*tspan)[0] = span->Start; - (*tspan)[1] = span->Stop - 1; - *tspan += 2; + rt_span_coverage(x, span->Start, span->Stop - 1); } } if (!(flags & DVF_SPANSONLY) && (x & 3) == 3) From c452d0257380799e6b2d89e6177380ae2a948235 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 7 Jun 2016 15:25:11 +0200 Subject: [PATCH 022/100] Added multicore rendering to true color drawers --- src/r_draw.h | 60 ++++- src/r_draw_rgba.cpp | 535 ++++++++++++++++++++++++++++++------------- src/r_drawt_rgba.cpp | 308 +++++++++++++------------ 3 files changed, 603 insertions(+), 300 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index 98be57c51..bf73c9dfb 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -25,6 +25,9 @@ #include "r_defs.h" #include +#include +#include +#include // Spectre/Invisibility. #define FUZZTABLE 50 @@ -74,7 +77,6 @@ extern "C" unsigned int dc_tspans[4][MAXHEIGHT]; extern "C" unsigned int *dc_ctspan[4]; extern "C" unsigned int horizspans[4]; - // [RH] Pointers to the different column and span drawers... // The span blitting interface. @@ -443,19 +445,58 @@ void R_SetTranslationMap(lighttable_t *translation); // Wait until all drawers finished executing void R_FinishDrawerCommands(); +class DrawerCommandQueue; + class DrawerThread { public: + std::thread thread; + + // Thread line index of this thread int core = 0; + + // Number of active threads int num_cores = 1; uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; uint32_t *dc_temp_rgba; + + // Checks if a line is rendered by this thread + bool line_skipped_by_thread(int line) + { + return line % num_cores != core; + } + + // The number of lines to skip to reach the first line to be rendered by this thread + int skipped_by_thread(int first_line) + { + return (num_cores - (first_line - core) % num_cores) % num_cores; + } + + // The number of lines to be rendered by this thread + int count_for_thread(int first_line, int count) + { + return (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores; + } + + // Calculate the dest address for the first line to be rendered by this thread + uint32_t *dest_for_thread(int first_line, int pitch, uint32_t *dest) + { + return dest + skipped_by_thread(first_line) * pitch; + } }; class DrawerCommand { +protected: + int dc_dest_y; + public: + DrawerCommand() + { + dc_dest_y = static_cast((dc_dest - dc_destorg) / (dc_pitch * 4)); + } + virtual void Execute(DrawerThread *thread) = 0; }; @@ -467,8 +508,25 @@ class DrawerCommandQueue std::vector commands; + std::vector threads; + + std::mutex start_mutex; + std::condition_variable start_condition; + std::vector active_commands; + bool shutdown_flag = false; + int run_id = 0; + + std::mutex end_mutex; + std::condition_variable end_condition; + int finished_threads = 0; + + void StartThreads(); + void StopThreads(); + static DrawerCommandQueue *Instance(); + ~DrawerCommandQueue(); + public: // Allocate memory valid for the duration of a command execution static void* AllocMemory(size_t size); diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 9e61bb427..489716e1f 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -63,6 +63,11 @@ DrawerCommandQueue *DrawerCommandQueue::Instance() return &queue; } +DrawerCommandQueue::~DrawerCommandQueue() +{ + StopThreads(); +} + void* DrawerCommandQueue::AllocMemory(size_t size) { // Make sure allocations remain 16-byte aligned @@ -81,19 +86,102 @@ void DrawerCommandQueue::Finish() { auto queue = Instance(); - DrawerThread thread; + // Give worker threads something to do: - size_t size = queue->commands.size(); + std::unique_lock start_lock(queue->start_mutex); + queue->active_commands.swap(queue->commands); + queue->run_id++; + start_lock.unlock(); + + queue->StartThreads(); + queue->start_condition.notify_all(); + + // Do one thread ourselves: + + DrawerThread thread; + thread.core = 0; + thread.num_cores = queue->threads.size() + 1; + + size_t size = queue->active_commands.size(); for (size_t i = 0; i < size; i++) { - auto &command = queue->commands[i]; + auto &command = queue->active_commands[i]; command->Execute(&thread); } - for (auto &command : queue->commands) + // Wait for everyone to finish: + + std::unique_lock end_lock(queue->end_mutex); + queue->end_condition.wait(end_lock, [&]() { return queue->finished_threads == queue->threads.size(); }); + + // Clean up batch: + + for (auto &command : queue->active_commands) command->~DrawerCommand(); - queue->commands.clear(); + queue->active_commands.clear(); queue->memorypool_pos = 0; + queue->finished_threads = 0; +} + +void DrawerCommandQueue::StartThreads() +{ + if (!threads.empty()) + return; + + int num_threads = std::thread::hardware_concurrency(); + if (num_threads == 0) + num_threads = 4; + + threads.resize(num_threads - 1); + + for (int i = 0; i < num_threads - 1; i++) + { + DrawerCommandQueue *queue = this; + DrawerThread *thread = &threads[i]; + thread->core = i + 1; + thread->num_cores = num_threads; + thread->thread = std::thread([=]() + { + int run_id = 0; + while (true) + { + // Wait until we are signalled to run: + std::unique_lock start_lock(queue->start_mutex); + queue->start_condition.wait(start_lock, [&]() { return queue->run_id != run_id || queue->shutdown_flag; }); + if (queue->shutdown_flag) + break; + run_id = queue->run_id; + start_lock.unlock(); + + // Do the work: + size_t size = queue->active_commands.size(); + for (size_t i = 0; i < size; i++) + { + auto &command = queue->active_commands[i]; + command->Execute(thread); + } + + // Notify main thread that we finished: + std::unique_lock end_lock(queue->end_mutex); + queue->finished_threads++; + end_lock.unlock(); + queue->end_condition.notify_all(); + } + }); + } +} + +void DrawerCommandQueue::StopThreads() +{ + std::unique_lock lock(start_mutex); + shutdown_flag = true; + lock.unlock(); + start_condition.notify_all(); + for (auto &thread : threads) + thread.thread.join(); + threads.clear(); + lock.lock(); + shutdown_flag = false; } ///////////////////////////////////////////////////////////////////////////// @@ -129,28 +217,28 @@ public: fixed_t frac; fixed_t fracstep; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); // Zero length, column does not exceed a pixel. if (count <= 0) return; // Framebuffer destination address. - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; // Determine scaling, // which is the only mapping to be done. - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = dc_iscale * thread->num_cores; + frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); { // [RH] Get local copies of these variables so that the compiler // has a better chance of optimizing this well. const BYTE *source = dc_source; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; // Inner loop that does the actual texture mapping, // e.g. a DDA-lile scaling. @@ -190,17 +278,17 @@ public: int count; uint32_t* dest; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); uint32_t light = calc_light_multiplier(dc_light); { - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; BYTE color = dc_color; do @@ -235,12 +323,12 @@ public: int count; uint32_t *dest; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; - int pitch = dc_pitch; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + int pitch = dc_pitch * thread->num_cores; uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; @@ -286,12 +374,12 @@ public: int count; uint32_t *dest; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; - int pitch = dc_pitch; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + int pitch = dc_pitch * thread->num_cores; uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; @@ -337,12 +425,12 @@ public: int count; uint32_t *dest; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; - int pitch = dc_pitch; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + int pitch = dc_pitch * thread->num_cores; uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; @@ -388,12 +476,12 @@ public: int count; uint32_t *dest; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; - int pitch = dc_pitch; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + int pitch = dc_pitch * thread->num_cores; uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 24) & 0xff; @@ -451,15 +539,13 @@ public: if (dc_yh > fuzzviewheight) dc_yh = fuzzviewheight; - count = dc_yh - dc_yl; + count = thread->count_for_thread(dc_yl, dc_yh - dc_yl + 1); // Zero length. - if (count < 0) + if (count <= 0) return; - count++; - - dest = ylookup[dc_yl] + dc_x + (uint32_t*)dc_destorg; + dest = thread->dest_for_thread(dc_yl, dc_pitch, ylookup[dc_yl] + dc_x + (uint32_t*)dc_destorg); // Note: this implementation assumes this function is only used for the pinky shadow effect (i.e. no other fancy colormap than black) // I'm not sure if this is really always the case or not. @@ -467,7 +553,7 @@ public: { // [RH] Make local copies of global vars to try and improve // the optimizations made by the compiler. - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; int fuzz = fuzzpos; int cnt; @@ -573,18 +659,18 @@ public: fixed_t frac; fixed_t fracstep; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = dc_iscale * thread->num_cores; + frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); { const BYTE *source = dc_source; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -649,23 +735,23 @@ public: fixed_t frac; fixed_t fracstep; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = dc_iscale * thread->num_cores; + frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); { // [RH] Local copies of global vars to improve compiler optimizations BYTE *translation = dc_translation; const BYTE *source = dc_source; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; do { @@ -710,22 +796,22 @@ public: fixed_t frac; fixed_t fracstep; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = dc_iscale * thread->num_cores; + frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); { BYTE *translation = dc_translation; const BYTE *source = dc_source; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -787,15 +873,15 @@ public: uint32_t *dest; fixed_t frac, fracstep; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = dc_iscale * thread->num_cores; + frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 16) & 0xff; @@ -805,7 +891,7 @@ public: { const BYTE *source = dc_source; BYTE *colormap = dc_colormap; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; do { @@ -863,18 +949,18 @@ public: fixed_t frac; fixed_t fracstep; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = dc_iscale * thread->num_cores; + frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); { const BYTE *source = dc_source; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -941,19 +1027,19 @@ public: fixed_t frac; fixed_t fracstep; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = dc_iscale * thread->num_cores; + frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); { BYTE *translation = dc_translation; const BYTE *source = dc_source; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -1018,18 +1104,18 @@ public: fixed_t frac; fixed_t fracstep; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = dc_iscale * thread->num_cores; + frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); { const BYTE *source = dc_source; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -1096,19 +1182,19 @@ public: fixed_t frac; fixed_t fracstep; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = dc_iscale * thread->num_cores; + frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); { BYTE *translation = dc_translation; const BYTE *source = dc_source; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -1173,18 +1259,18 @@ public: fixed_t frac; fixed_t fracstep; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = dc_iscale * thread->num_cores; + frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); { const BYTE *source = dc_source; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); @@ -1250,19 +1336,19 @@ public: fixed_t frac; fixed_t fracstep; - count = dc_count; + count = thread->count_for_thread(dc_dest_y, dc_count); if (count <= 0) return; - dest = (uint32_t*)dc_dest; + dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = dc_iscale * thread->num_cores; + frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); { BYTE *translation = dc_translation; const BYTE *source = dc_source; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -1329,6 +1415,9 @@ public: #ifdef NO_SSE void Execute(DrawerThread *thread) override { + if (thread->line_skipped_by_thread(ds_y)) + return; + dsfixed_t xfrac; dsfixed_t yfrac; dsfixed_t xstep; @@ -1391,6 +1480,9 @@ public: #else void Execute(DrawerThread *thread) override { + if (thread->line_skipped_by_thread(ds_y)) + return; + dsfixed_t xfrac; dsfixed_t yfrac; dsfixed_t xstep; @@ -1572,6 +1664,9 @@ public: void Execute(DrawerThread *thread) override { + if (thread->line_skipped_by_thread(ds_y)) + return; + dsfixed_t xfrac; dsfixed_t yfrac; dsfixed_t xstep; @@ -1671,6 +1766,9 @@ public: void Execute(DrawerThread *thread) override { + if (thread->line_skipped_by_thread(ds_y)) + return; + dsfixed_t xfrac; dsfixed_t yfrac; dsfixed_t xstep; @@ -1789,6 +1887,9 @@ public: void Execute(DrawerThread *thread) override { + if (thread->line_skipped_by_thread(ds_y)) + return; + dsfixed_t xfrac; dsfixed_t yfrac; dsfixed_t xstep; @@ -1917,6 +2018,9 @@ public: void Execute(DrawerThread *thread) override { + if (thread->line_skipped_by_thread(ds_y)) + return; + dsfixed_t xfrac; dsfixed_t yfrac; dsfixed_t xstep; @@ -2035,6 +2139,9 @@ public: void Execute(DrawerThread *thread) override { + if (thread->line_skipped_by_thread(ds_y)) + return; + dsfixed_t xfrac; dsfixed_t yfrac; dsfixed_t xstep; @@ -2149,6 +2256,9 @@ public: void Execute(DrawerThread *thread) override { + if (thread->line_skipped_by_thread(ds_y)) + return; + uint32_t *dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; int count = (ds_x2 - ds_x1 + 1); uint32_t light = calc_light_multiplier(ds_light); @@ -2186,13 +2296,16 @@ public: void Execute(DrawerThread *thread) override { - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + DWORD fracstep = dc_iscale * thread->num_cores; + DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = vlinebits; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -2238,8 +2351,12 @@ public: #ifdef NO_SSE void Execute(DrawerThread *thread) override { - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + int pitch = dc_pitch * thread->num_cores; int bits = vlinebits; DWORD place; @@ -2250,21 +2367,34 @@ public: ShadeConstants shade_constants = dc_shade_constants; + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(dc_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + do { - dest[0] = shade_pal_index(bufplce[0][(place = vplce[0]) >> bits], light0, shade_constants); vplce[0] = place + vince[0]; - dest[1] = shade_pal_index(bufplce[1][(place = vplce[1]) >> bits], light1, shade_constants); vplce[1] = place + vince[1]; - dest[2] = shade_pal_index(bufplce[2][(place = vplce[2]) >> bits], light2, shade_constants); vplce[2] = place + vince[2]; - dest[3] = shade_pal_index(bufplce[3][(place = vplce[3]) >> bits], light3, shade_constants); vplce[3] = place + vince[3]; - dest += dc_pitch; + dest[0] = shade_pal_index(bufplce[0][(place = local_vplce[0]) >> bits], light0, shade_constants); local_vplce[0] = place + local_vince[0]; + dest[1] = shade_pal_index(bufplce[1][(place = local_vplce[1]) >> bits], light1, shade_constants); local_vplce[1] = place + local_vince[1]; + dest[2] = shade_pal_index(bufplce[2][(place = local_vplce[2]) >> bits], light2, shade_constants); local_vplce[2] = place + local_vince[2]; + dest[3] = shade_pal_index(bufplce[3][(place = local_vplce[3]) >> bits], light3, shade_constants); local_vplce[3] = place + local_vince[3]; + dest += pitch; } while (--count); } #else void Execute(DrawerThread *thread) override { - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = vlinebits; + int pitch = dc_pitch * thread->num_cores; uint32_t light0 = calc_light_multiplier(palookuplight[0]); uint32_t light1 = calc_light_multiplier(palookuplight[1]); @@ -2276,6 +2406,12 @@ public: uint32_t *palette = (uint32_t*)GPalette.BaseColors; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(dc_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } if (shade_constants.simple_shade) { @@ -2300,7 +2436,7 @@ public: __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); SSE_SHADE_SIMPLE(fg); _mm_storeu_si128((__m128i*)dest, fg); - dest += dc_pitch; + dest += pitch; } while (--count); } else @@ -2326,7 +2462,7 @@ public: __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); SSE_SHADE(fg, shade_constants); _mm_storeu_si128((__m128i*)dest, fg); - dest += dc_pitch; + dest += pitch; } while (--count); } } @@ -2361,13 +2497,16 @@ public: void Execute(DrawerThread *thread) override { - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + DWORD fracstep = dc_iscale * thread->num_cores; + DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = mvlinebits; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -2417,8 +2556,12 @@ public: #ifdef NO_SSE void Execute(DrawerThread *thread) override { - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + int pitch = dc_pitch * thread->num_cores; int bits = mvlinebits; DWORD place; @@ -2429,21 +2572,34 @@ public: ShadeConstants shade_constants = dc_shade_constants; + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(dc_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + do { BYTE pix; - pix = bufplce[0][(place = vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(pix, light0, shade_constants); vplce[0] = place + vince[0]; - pix = bufplce[1][(place = vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(pix, light1, shade_constants); vplce[1] = place + vince[1]; - pix = bufplce[2][(place = vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(pix, light2, shade_constants); vplce[2] = place + vince[2]; - pix = bufplce[3][(place = vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(pix, light3, shade_constants); vplce[3] = place + vince[3]; - dest += dc_pitch; + pix = bufplce[0][(place = local_vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(pix, light0, shade_constants); local_vplce[0] = place + local_vince[0]; + pix = bufplce[1][(place = local_vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(pix, light1, shade_constants); local_vplce[1] = place + local_vince[1]; + pix = bufplce[2][(place = local_vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(pix, light2, shade_constants); local_vplce[2] = place + local_vince[2]; + pix = bufplce[3][(place = local_vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(pix, light3, shade_constants); local_vplce[3] = place + local_vince[3]; + dest += pitch; } while (--count); } #else void Execute(DrawerThread *thread) override { - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + int pitch = dc_pitch * thread->num_cores; int bits = mvlinebits; uint32_t light0 = calc_light_multiplier(palookuplight[0]); @@ -2456,6 +2612,12 @@ public: uint32_t *palette = (uint32_t*)GPalette.BaseColors; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(dc_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } if (shade_constants.simple_shade) { @@ -2483,7 +2645,7 @@ public: __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); SSE_SHADE_SIMPLE(fg); _mm_maskmoveu_si128(fg, movemask, (char*)dest); - dest += dc_pitch; + dest += pitch; } while (--count); } else @@ -2512,7 +2674,7 @@ public: __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); SSE_SHADE(fg, shade_constants); _mm_maskmoveu_si128(fg, movemask, (char*)dest); - dest += dc_pitch; + dest += pitch; } while (--count); } } @@ -2551,13 +2713,16 @@ public: void Execute(DrawerThread *thread) override { - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + DWORD fracstep = dc_iscale * thread->num_cores; + DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = tmvlinebits; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -2626,8 +2791,12 @@ public: void Execute(DrawerThread *thread) override { - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + int pitch = dc_pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -2641,11 +2810,20 @@ public: uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(dc_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + do { for (int i = 0; i < 4; ++i) { - BYTE pix = bufplce[i][vplce[i] >> bits]; + BYTE pix = bufplce[i][local_vplce[i] >> bits]; if (pix != 0) { uint32_t fg = shade_pal_index(pix, light[i], shade_constants); @@ -2663,9 +2841,9 @@ public: dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } - vplce[i] += vince[i]; + local_vplce[i] += local_vince[i]; } - dest += dc_pitch; + dest += pitch; } while (--count); } }; @@ -2702,13 +2880,16 @@ public: void Execute(DrawerThread *thread) override { - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + DWORD fracstep = dc_iscale * thread->num_cores; + DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = tmvlinebits; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -2777,8 +2958,12 @@ public: void Execute(DrawerThread *thread) override { - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + int pitch = dc_pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -2792,11 +2977,20 @@ public: uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(dc_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + do { for (int i = 0; i < 4; ++i) { - BYTE pix = bufplce[i][vplce[i] >> bits]; + BYTE pix = bufplce[i][local_vplce[i] >> bits]; if (pix != 0) { uint32_t fg = shade_pal_index(pix, light[i], shade_constants); @@ -2814,9 +3008,9 @@ public: dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } - vplce[i] += vince[i]; + local_vplce[i] += local_vince[i]; } - dest += dc_pitch; + dest += pitch; } while (--count); } }; @@ -2853,13 +3047,16 @@ public: void Execute(DrawerThread *thread) override { - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + DWORD fracstep = dc_iscale * thread->num_cores; + DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = tmvlinebits; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -2928,8 +3125,12 @@ public: void Execute(DrawerThread *thread) override { - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + int pitch = dc_pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -2943,11 +3144,20 @@ public: uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(dc_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + do { for (int i = 0; i < 4; ++i) { - BYTE pix = bufplce[i][vplce[i] >> bits]; + BYTE pix = bufplce[i][local_vplce[i] >> bits]; if (pix != 0) { uint32_t fg = shade_pal_index(pix, light[i], shade_constants); @@ -2965,9 +3175,9 @@ public: dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } - vplce[i] += vince[i]; + local_vplce[i] += local_vince[i]; } - dest += dc_pitch; + dest += pitch; } while (--count); } }; @@ -3004,13 +3214,16 @@ public: void Execute(DrawerThread *thread) override { - DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + DWORD fracstep = dc_iscale * thread->num_cores; + DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); const BYTE *source = dc_source; - uint32_t *dest = (uint32_t*)dc_dest; + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = tmvlinebits; - int pitch = dc_pitch; + int pitch = dc_pitch * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -3079,8 +3292,12 @@ public: void Execute(DrawerThread *thread) override { - uint32_t *dest = (uint32_t*)dc_dest; - int count = dc_count; + int count = thread->count_for_thread(dc_dest_y, dc_count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + int pitch = dc_pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -3094,11 +3311,20 @@ public: uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(dc_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + do { for (int i = 0; i < 4; ++i) { - BYTE pix = bufplce[i][vplce[i] >> bits]; + BYTE pix = bufplce[i][local_vplce[i] >> bits]; if (pix != 0) { uint32_t fg = shade_pal_index(pix, light[i], shade_constants); @@ -3116,9 +3342,9 @@ public: dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } - vplce[i] += vince[i]; + local_vplce[i] += local_vince[i]; } - dest += dc_pitch; + dest += pitch; } while (--count); } }; @@ -3146,6 +3372,9 @@ public: void Execute(DrawerThread *thread) override { + if (thread->line_skipped_by_thread(_y)) + return; + int y = _y; int x = _x; int x2 = _x2; diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index bbf68a795..c2caec0c2 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -78,26 +78,26 @@ public: uint32_t *source; uint32_t *dest; int count; - int pitch; + int pitch, sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, (yh - yl + 1)); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4 + hx]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = thread->num_cores * 4; if (count & 1) { *dest = *source; - source += 4; + source += sincr; dest += pitch; } if (count & 2) { dest[0] = source[0]; - dest[pitch] = source[4]; - source += 8; + dest[pitch] = source[sincr]; + source += sincr * 2; dest += pitch * 2; } if (!(count >>= 2)) @@ -105,10 +105,10 @@ public: do { dest[0] = source[0]; - dest[pitch] = source[4]; - dest[pitch * 2] = source[8]; - dest[pitch * 3] = source[12]; - source += 16; + dest[pitch] = source[sincr]; + dest[pitch * 2] = source[sincr * 2]; + dest[pitch * 3] = source[sincr * 3]; + source += sincr * 4; dest += pitch * 4; } while (--count); } @@ -145,22 +145,23 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4 + hx]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = thread->num_cores * 4; if (count & 1) { *dest = shade_pal_index(*source, light, shade_constants); - source += 4; + source += sincr; dest += pitch; } if (!(count >>= 1)) @@ -168,8 +169,8 @@ public: do { dest[0] = shade_pal_index(source[0], light, shade_constants); - dest[pitch] = shade_pal_index(source[4], light, shade_constants); - source += 8; + dest[pitch] = shade_pal_index(source[sincr], light, shade_constants); + source += sincr * 2; dest += pitch * 2; } while (--count); } @@ -205,25 +206,26 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = thread->num_cores * 4; if (count & 1) { dest[0] = shade_pal_index(source[0], light, shade_constants); dest[1] = shade_pal_index(source[1], light, shade_constants); dest[2] = shade_pal_index(source[2], light, shade_constants); dest[3] = shade_pal_index(source[3], light, shade_constants); - source += 4; + source += sincr; dest += pitch; } if (!(count >>= 1)) @@ -234,11 +236,11 @@ public: dest[1] = shade_pal_index(source[1], light, shade_constants); dest[2] = shade_pal_index(source[2], light, shade_constants); dest[3] = shade_pal_index(source[3], light, shade_constants); - dest[pitch] = shade_pal_index(source[4], light, shade_constants); - dest[pitch + 1] = shade_pal_index(source[5], light, shade_constants); - dest[pitch + 2] = shade_pal_index(source[6], light, shade_constants); - dest[pitch + 3] = shade_pal_index(source[7], light, shade_constants); - source += 8; + dest[pitch] = shade_pal_index(source[sincr], light, shade_constants); + dest[pitch + 1] = shade_pal_index(source[sincr + 1], light, shade_constants); + dest[pitch + 2] = shade_pal_index(source[sincr + 2], light, shade_constants); + dest[pitch + 3] = shade_pal_index(source[sincr + 3], light, shade_constants); + source += sincr * 2; dest += pitch * 2; } while (--count); } @@ -249,19 +251,20 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; ShadeConstants shade_constants = dc_shade_constants; uint32_t light = calc_light_multiplier(dc_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = thread->num_cores * 4; if (shade_constants.simple_shade) { @@ -278,7 +281,7 @@ public: SSE_SHADE_SIMPLE(fg); _mm_storeu_si128((__m128i*)dest, fg); - source += 4; + source += sincr; dest += pitch; } if (!(count >>= 1)) @@ -299,17 +302,17 @@ public: // shade_pal_index 4-7 (pitch) { - uint32_t p0 = source[4]; - uint32_t p1 = source[5]; - uint32_t p2 = source[6]; - uint32_t p3 = source[7]; + uint32_t p0 = source[sincr]; + uint32_t p1 = source[sincr + 1]; + uint32_t p2 = source[sincr + 2]; + uint32_t p3 = source[sincr + 3]; __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); SSE_SHADE_SIMPLE(fg); _mm_storeu_si128((__m128i*)(dest + pitch), fg); } - source += 8; + source += sincr * 2; dest += pitch * 2; } while (--count); } @@ -328,7 +331,7 @@ public: SSE_SHADE(fg, shade_constants); _mm_storeu_si128((__m128i*)dest, fg); - source += 4; + source += sincr; dest += pitch; } if (!(count >>= 1)) @@ -349,17 +352,17 @@ public: // shade_pal_index 4-7 (pitch) { - uint32_t p0 = source[4]; - uint32_t p1 = source[5]; - uint32_t p2 = source[6]; - uint32_t p3 = source[7]; + uint32_t p0 = source[sincr]; + uint32_t p1 = source[sincr + 1]; + uint32_t p2 = source[sincr + 2]; + uint32_t p3 = source[sincr + 3]; __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); SSE_SHADE(fg, shade_constants); _mm_storeu_si128((__m128i*)(dest + pitch), fg); } - source += 8; + source += sincr * 2; dest += pitch * 2; } while (--count); } @@ -522,15 +525,16 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4 + hx]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -554,7 +558,7 @@ public: *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -590,15 +594,16 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -625,7 +630,7 @@ public: dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -636,15 +641,16 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; @@ -686,7 +692,7 @@ public: __m128i color = _mm_packus_epi16(color_lo, color_hi); _mm_storeu_si128((__m128i*)dest, color); - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -722,7 +728,7 @@ public: __m128i color = _mm_packus_epi16(color_lo, color_hi); _mm_storeu_si128((__m128i*)dest, color); - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -764,16 +770,17 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; colormap = dc_colormap; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4 + hx]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 16) & 0xff; @@ -793,7 +800,7 @@ public: uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -832,16 +839,17 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; colormap = dc_colormap; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); uint32_t fg_red = (fg >> 16) & 0xff; @@ -864,7 +872,7 @@ public: dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -876,16 +884,17 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; colormap = dc_colormap; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light))), _mm_setzero_si128()); __m128i alpha_one = _mm_set1_epi16(64); @@ -913,7 +922,7 @@ public: __m128i color = _mm_packus_epi16(color_lo, color_hi); _mm_storeu_si128((__m128i*)dest, color); - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -955,15 +964,16 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4 + hx]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -986,7 +996,7 @@ public: uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -1026,15 +1036,16 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -1060,7 +1071,7 @@ public: dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -1071,15 +1082,16 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; @@ -1121,7 +1133,7 @@ public: __m128i color = _mm_packus_epi16(color_lo, color_hi); _mm_storeu_si128((__m128i*)dest, color); - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -1157,7 +1169,7 @@ public: __m128i color = _mm_packus_epi16(color_lo, color_hi); _mm_storeu_si128((__m128i*)dest, color); - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -1200,15 +1212,16 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4 + hx]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -1231,7 +1244,7 @@ public: uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -1270,15 +1283,16 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -1305,7 +1319,7 @@ public: dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -1346,15 +1360,16 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4 + hx]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -1377,7 +1392,7 @@ public: uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += 4; + source += sincr; dest += pitch; } while (--count); } @@ -1416,15 +1431,16 @@ public: uint32_t *dest; int count; int pitch; + int sincr; - count = yh - yl; - if (count < 0) + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) return; - count++; - dest = ylookup[yl] + sx + (uint32_t*)dc_destorg; - source = &thread->dc_temp_rgba[yl * 4]; - pitch = dc_pitch; + dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = dc_pitch * thread->num_cores; + sincr = 4 * thread->num_cores; uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; @@ -1451,7 +1467,7 @@ public: dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; } - source += 4; + source += sincr; dest += pitch; } while (--count); } From c59db95cc87543e648f64a19d37f78f2a6656d4d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 9 Jun 2016 23:12:38 +0200 Subject: [PATCH 023/100] Rewrote wallscan to fix buffer overruns and code duplication. --- src/r_segs.cpp | 910 +++++++++++++++---------------------------------- 1 file changed, 283 insertions(+), 627 deletions(-) diff --git a/src/r_segs.cpp b/src/r_segs.cpp index bd2c7d22b..5aa7c29a2 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1065,53 +1065,149 @@ void R_RenderFakeWallRange (drawseg_t *ds, int x1, int x2) return; } -// prevlineasm1 is like vlineasm1 but skips the loop if only drawing one pixel -inline fixed_t prevline1 (fixed_t vince, BYTE *colormap, fixed_t light, int count, fixed_t vplce, const BYTE *bufplce, BYTE *dest) +// Draw a column with support for non-power-of-two ranges +uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv_step, uint32_t uv_max, const BYTE *source, DWORD(*draw1column)()) { - dc_iscale = vince; - dc_colormap = colormap; - dc_light = light; - dc_count = count; - dc_texturefrac = vplce; - dc_source = bufplce; - dc_dest = dest; - return doprevline1 (); + int pixelsize = r_swtruecolor ? 4 : 1; + if (uv_max == 0) // power of two + { + int count = y2 - y1; + if (count > 0) + { + dc_source = source; + dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_count = y2 - y1; + dc_iscale = uv_step; + dc_texturefrac = uv_start; + draw1column(); + } + return uv_start + uv_step * (uint32_t)count; + } + else + { + uint32_t uv_pos = uv_start; + + int left = y2 - y1; + while (left > 0) + { + int next_uv_wrap = (uv_max - uv_pos + uv_step - 1) / uv_step; + int count = MIN(left, next_uv_wrap); + + if (count > 0) + { + dc_source = source; + dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_count = count; + dc_iscale = uv_step; + dc_texturefrac = uv_pos; + draw1column(); + } + + left -= count; + uv_pos += uv_step * count; + if (uv_pos >= uv_max) + uv_pos -= uv_max; + } + + return uv_pos; + } } -void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, - double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) +// Draw four columns with support for non-power-of-two ranges +void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_step, uint32_t uv_max, const BYTE **source, void(*draw4columns)()) { - int x, fracbits; - int y1ve[4], y2ve[4], u4, d4, z; - char bad; - float light = rw_light - rw_lightstep; - SDWORD xoffset; - BYTE *basecolormapdata; - double iscale; - - // This function also gets used to draw skies. Unlike BUILD, skies are - // drawn by visplane instead of by bunch, so these checks are invalid. - //if ((uwal[x1] > viewheight) && (uwal[x2] > viewheight)) return; - //if ((dwal[x1] < 0) && (dwal[x2] < 0)) return; - - if (rw_pic->UseType == FTexture::TEX_Null) - { - return; - } - -//extern cycle_t WallScanCycles; -//clock (WallScanCycles); - int pixelsize = r_swtruecolor ? 4 : 1; + if (uv_max == 0) // power of two, no wrap handling needed + { + for (int i = 0; i < 4; i++) + { + bufplce[i] = source[i]; + vplce[i] = uv_pos[i]; + vince[i] = uv_step[i]; + uv_pos[i] += uv_step[i] * (y2 - y1); + } + dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_count = y2 - y1; + draw4columns(); + } + else + { + dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + for (int i = 0; i < 4; i++) + bufplce[i] = source[i]; - rw_pic->GetHeight(); // Make sure texture size is loaded - fracbits = 32 - rw_pic->HeightBits; - setupvline(fracbits); - xoffset = rw_offset; - basecolormapdata = basecolormap->Maps; + int left = y2 - y1; + while (left > 0) + { + // Find which column wraps first + int count = left; + for (int i = 0; i < 4; i++) + { + int next_uv_wrap = (uv_max - uv_pos[i] + uv_step[i] - 1) / uv_step[i]; + count = MIN(next_uv_wrap, count); + } - x = x1; - //while ((umost[x] > dmost[x]) && (x < x2)) x++; + // Draw until that column wraps + if (count > 0) + { + for (int i = 0; i < 4; i++) + { + vplce[i] = uv_pos[i]; + vince[i] = uv_step[i]; + } + dc_count = count; + draw4columns(); + } + + // Wrap the uv position + for (int i = 0; i < 4; i++) + { + uv_pos[i] += uv_step[i] * count; + if (uv_pos[i] >= uv_max) + uv_pos[i] -= uv_max; + } + + left -= count; + } + } +} + +// Calculates a wrapped uv start position value for a column +void calc_uv_start_and_step(int y1, float swal, double yrepeat, uint32_t uv_height, int fracbits, uint32_t &uv_start_out, uint32_t &uv_step_out) +{ + double uv_stepd = swal * yrepeat; + + // Find start uv in [0-uv_height[ range. + // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. + double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / uv_height; + v = v - std::floor(v); + v *= uv_height; + v *= (1 << fracbits); + + uv_start_out = (uint32_t)v; + uv_step_out = xs_ToFixed(fracbits, uv_stepd); +} + +typedef DWORD(*Draw1ColumnFuncPtr)(); +typedef void(*Draw4ColumnsFuncPtr)(); + +void wallscan_any( + int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, + const BYTE *(*getcol)(FTexture *tex, int x), + void(setupwallscan(int bits,Draw1ColumnFuncPtr &draw1, Draw4ColumnsFuncPtr &draw2))) +{ + if (rw_pic->UseType == FTexture::TEX_Null) + return; + + uint32_t uv_height = rw_pic->GetHeight(); + uint32_t fracbits = 32 - rw_pic->HeightBits; + uint32_t uv_max = uv_height << fracbits; + + DWORD(*draw1column)(); + void(*draw4columns)(); + setupwallscan(fracbits, draw1column, draw4columns); + + fixed_t xoffset = rw_offset; bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); if (fixed) @@ -1131,139 +1227,190 @@ void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *l else R_SetColorMapLight(basecolormap, 0, 0); - for(; (x < x2) && (x & 3); ++x) + float light = rw_light; + + // Calculate where 4 column alignment begins and ends: + int aligned_x1 = clamp((x1 + 3) / 4 * 4, x1, x2); + int aligned_x2 = clamp(x2 / 4 * 4, x1, x2); + + // First unaligned columns: + for (int x = x1; x < aligned_x1; x++, light += rw_lightstep) { - light += rw_lightstep; - y1ve[0] = uwal[x];//max(uwal[x],umost[x]); - y2ve[0] = dwal[x];//min(dwal[x],dmost[x]); - if (y2ve[0] <= y1ve[0]) continue; - assert (y1ve[0] < viewheight); - assert (y2ve[0] <= viewheight); + int y1 = uwal[x]; + int y2 = dwal[x]; + if (y2 <= y1) + continue; if (!fixed) - { // calculate lighting R_SetColorMapLight(basecolormap, light, wallshade); - } - dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dc_dest = (ylookup[y1ve[0]] + x)*pixelsize + dc_destorg; - dc_count = y2ve[0] - y1ve[0]; - iscale = swal[x] * yrepeat; - dc_iscale = xs_ToFixed(fracbits, iscale); - dc_texturefrac = xs_ToFixed(fracbits, dc_texturemid + iscale * (y1ve[0] - CenterY + 0.5)); + const BYTE *source = getcol(rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dovline1(); + uint32_t uv_start, uv_step; + calc_uv_start_and_step(y1, swal[x], yrepeat, uv_height, fracbits, uv_start, uv_step); + + wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, source, draw1column); } - for(; x < x2-3; x += 4) + // The aligned columns + for (int x = aligned_x1; x < aligned_x2; x += 4) { - bad = 0; - for (z = 3; z>= 0; --z) - { - y1ve[z] = uwal[x+z];//max(uwal[x+z],umost[x+z]); - y2ve[z] = dwal[x+z];//min(dwal[x+z],dmost[x+z])-1; - if (y2ve[z] <= y1ve[z]) { bad += 1<> FRACBITS); - iscale = swal[x + z] * yrepeat; - vince[z] = xs_ToFixed(fracbits, iscale); - vplce[z] = xs_ToFixed(fracbits, dc_texturemid + iscale * (y1ve[z] - CenterY + 0.5)); - } - if (bad == 15) + const BYTE *source[4]; + for (int i = 0; i < 4; i++) + source[i] = getcol(rw_pic, (lwal[x + i] + xoffset) >> FRACBITS); + + float lights[4]; + for (int i = 0; i < 4; i++) { - light += rw_lightstep * 4; + lights[i] = light; + light += rw_lightstep; + } + + uint32_t uv_pos[4], uv_step[4]; + for (int i = 0; i < 4; i++) + calc_uv_start_and_step(y1[i], swal[x + i], yrepeat, uv_height, fracbits, uv_pos[i], uv_step[i]); + + // Figure out where we vertically can start and stop drawing 4 columns in one go + int middle_y1 = y1[0]; + int middle_y2 = y2[0]; + for (int i = 1; i < 4; i++) + { + middle_y1 = MAX(y1[i], middle_y1); + middle_y2 = MIN(y2[i], middle_y2); + } + + // If we got an empty column in our set we cannot draw 4 columns in one go: + bool empty_column_in_set = false; + for (int i = 0; i < 4; i++) + { + if (y2[i] <= y1[i]) + empty_column_in_set = true; + } + + if (empty_column_in_set || middle_y2 <= middle_y1) + { + for (int i = 0; i < 4; i++) + { + if (!fixed) + R_SetColorMapLight(basecolormap, lights[i], wallshade); + wallscan_drawcol1(x + i, y1[i], y2[i], uv_pos[i], uv_step[i], uv_max, source[i], draw1column); + } continue; } + // Draw the first rows where not all 4 columns are active + for (int i = 0; i < 4; i++) + { + if (!fixed) + R_SetColorMapLight(basecolormap, lights[i], wallshade); + + if (y1[i] < middle_y1) + uv_pos[i] = wallscan_drawcol1(x + i, y1[i], middle_y1, uv_pos[i], uv_step[i], uv_max, source[i], draw1column); + } + + // Draw the area where all 4 columns are active if (!fixed) { - for (z = 0; z < 4; ++z) + for (int i = 0; i < 4; i++) { - light += rw_lightstep; if (r_swtruecolor) { - palookupoffse[z] = basecolormapdata; - palookuplight[z] = LIGHTSCALE(light, wallshade); + palookupoffse[i] = basecolormap->Maps; + palookuplight[i] = LIGHTSCALE(lights[i], wallshade); } else { - palookupoffse[z] = basecolormapdata + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - palookuplight[z] = 0; + palookupoffse[i] = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); + palookuplight[i] = 0; } } } + wallscan_drawcol4(x, middle_y1, middle_y2, uv_pos, uv_step, uv_max, source, draw4columns); - u4 = MAX(MAX(y1ve[0],y1ve[1]),MAX(y1ve[2],y1ve[3])); - d4 = MIN(MIN(y2ve[0],y2ve[1]),MIN(y2ve[2],y2ve[3])); - - if ((bad != 0) || (u4 >= d4)) + // Draw the last rows where not all 4 columns are active + for (int i = 0; i < 4; i++) { - for (z = 0; z < 4; ++z) - { - if (!(bad & 1)) - { - prevline1(vince[z],palookupoffse[z],palookuplight[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+x+z)*pixelsize+dc_destorg); - } - bad >>= 1; - } - continue; - } + if (!fixed) + R_SetColorMapLight(basecolormap, lights[i], wallshade); - for (z = 0; z < 4; ++z) - { - if (u4 > y1ve[z]) - { - vplce[z] = prevline1(vince[z],palookupoffse[z], palookuplight[z],u4-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+x+z)*pixelsize+dc_destorg); - } - } - - if (d4 > u4) - { - dc_count = d4-u4; - dc_dest = (ylookup[u4]+x)*pixelsize+dc_destorg; - dovline4(); - } - - BYTE *i = (x+ylookup[d4])*pixelsize+dc_destorg; - for (z = 0; z < 4; ++z) - { - if (y2ve[z] > d4) - { - prevline1(vince[z],palookupoffse[0],palookuplight[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z*pixelsize); - } + if (middle_y2 < y2[i]) + uv_pos[i] = wallscan_drawcol1(x + i, middle_y2, y2[i], uv_pos[i], uv_step[i], uv_max, source[i], draw1column); } } - for(;x> FRACBITS); - dc_dest = (ylookup[y1ve[0]] + x) * pixelsize + dc_destorg; - dc_count = y2ve[0] - y1ve[0]; - iscale = swal[x] * yrepeat; - dc_iscale = xs_ToFixed(fracbits, iscale); - dc_texturefrac = xs_ToFixed(fracbits, dc_texturemid + iscale * (y1ve[0] - CenterY + 0.5)); + const BYTE *source = getcol(rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dovline1(); + uint32_t uv_start, uv_step; + calc_uv_start_and_step(y1, swal[x], yrepeat, uv_height, fracbits, uv_start, uv_step); + + wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, source, draw1column); } -//unclock (WallScanCycles); - NetUpdate (); } +void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) +{ + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + { + setupvline(bits); + line1 = dovline1; + line4 = dovline4; + }); +} + +void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) +{ + if (!rw_pic->bMasked) // Textures that aren't masked can use the faster wallscan. + { + wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); + } + else + { + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + { + setupmvline(bits); + line1 = domvline1; + line4 = domvline4; + }); + } +} + +void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) +{ + static fixed_t(*tmvline1)(); + static void(*tmvline4)(); + if (!R_GetTransMaskDrawers(&tmvline1, &tmvline4)) + { + // The current translucency is unsupported, so draw with regular maskwallscan instead. + maskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); + } + else + { + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + { + setuptmvline(bits); + line1 = reinterpret_cast(tmvline1); + line4 = tmvline4; + }); + } +} + void wallscan_striped (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat) { FDynamicColormap *startcolormap = basecolormap; @@ -1331,507 +1478,16 @@ static void call_wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, } } -//============================================================================= -// -// wallscan_np2 -// -// This is a wrapper around wallscan that helps it tile textures whose heights -// are not powers of 2. It divides the wall into texture-sized strips and calls -// wallscan for each of those. Since only one repetition of the texture fits -// in each strip, wallscan will not tile. -// -//============================================================================= - +// wallscan now tiles with non-power-of-two textures - this function is therefore not needed anymore.. void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, bool mask) { - if (!r_np2) - { - call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, mask); - } - else - { - short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; - short *up, *down; - double texheight = rw_pic->GetHeight(); - double partition; - double scaledtexheight = texheight / yrepeat; - - if (yrepeat >= 0) - { // normal orientation: draw strips from top to bottom - partition = top - fmod(top - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); - if (partition == top) - { - partition -= scaledtexheight; - } - up = uwal; - down = most1; - dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; - while (partition > bot) - { - int j = OWallMost(most3, partition - ViewPos.Z, &WallC); - if (j != 3) - { - for (int j = x1; j < x2; ++j) - { - down[j] = clamp(most3[j], up[j], dwal[j]); - } - call_wallscan(x1, x2, up, down, swal, lwal, yrepeat, mask); - up = down; - down = (down == most1) ? most2 : most1; - } - partition -= scaledtexheight; - dc_texturemid -= texheight; - } - call_wallscan(x1, x2, up, dwal, swal, lwal, yrepeat, mask); - } - else - { // upside down: draw strips from bottom to top - partition = bot - fmod(bot - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); - up = most1; - down = dwal; - dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; - while (partition < top) - { - int j = OWallMost(most3, partition - ViewPos.Z, &WallC); - if (j != 12) - { - for (int j = x1; j < x2; ++j) - { - up[j] = clamp(most3[j], uwal[j], down[j]); - } - call_wallscan(x1, x2, up, down, swal, lwal, yrepeat, mask); - down = up; - up = (up == most1) ? most2 : most1; - } - partition -= scaledtexheight; - dc_texturemid -= texheight; - } - call_wallscan(x1, x2, uwal, down, swal, lwal, yrepeat, mask); - } - } + call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, mask); } +// wallscan now tiles with non-power-of-two textures - this function is therefore not needed anymore.. static void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat) { - if (rw_pic->GetHeight() != 1 << rw_pic->HeightBits) - { - double frontcz1 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v1); - double frontfz1 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v1); - double frontcz2 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v2); - double frontfz2 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v2); - double top = MAX(frontcz1, frontcz2); - double bot = MIN(frontfz1, frontfz2); - if (fake3D & FAKE3D_CLIPTOP) - { - top = MIN(top, sclipTop); - } - if (fake3D & FAKE3D_CLIPBOTTOM) - { - bot = MAX(bot, sclipBottom); - } - wallscan_np2(x1, x2, uwal, dwal, swal, lwal, yrepeat, top, bot, true); - } - else - { - call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, true); - } -} - -inline fixed_t mvline1 (fixed_t vince, BYTE *colormap, fixed_t light, int count, fixed_t vplce, const BYTE *bufplce, BYTE *dest) -{ - dc_iscale = vince; - dc_colormap = colormap; - dc_light = light; - dc_count = count; - dc_texturefrac = vplce; - dc_source = bufplce; - dc_dest = dest; - return domvline1 (); -} - -void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, - double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) -{ - int x, fracbits; - BYTE *pixel; - int pixelsize, pixelshift; - int y1ve[4], y2ve[4], u4, d4, startx, dax, z; - char bad; - float light = rw_light - rw_lightstep; - SDWORD xoffset; - BYTE *basecolormapdata; - double iscale; - - if (rw_pic->UseType == FTexture::TEX_Null) - { - return; - } - - if (!rw_pic->bMasked) - { // Textures that aren't masked can use the faster wallscan. - wallscan (x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); - return; - } - -//extern cycle_t WallScanCycles; -//clock (WallScanCycles); - - pixelsize = r_swtruecolor ? 4 : 1; - pixelshift = r_swtruecolor ? 2 : 0; - - rw_pic->GetHeight(); // Make sure texture size is loaded - fracbits = 32- rw_pic->HeightBits; - setupmvline(fracbits); - xoffset = rw_offset; - basecolormapdata = basecolormap->Maps; - - x = startx = x1; - pixel = x * pixelsize + dc_destorg; - - bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); - if (fixed) - { - palookupoffse[0] = dc_colormap; - palookupoffse[1] = dc_colormap; - palookupoffse[2] = dc_colormap; - palookupoffse[3] = dc_colormap; - palookuplight[0] = 0; - palookuplight[1] = 0; - palookuplight[2] = 0; - palookuplight[3] = 0; - } - - if (fixedcolormap) - R_SetColorMapLight(fixedcolormap, 0, 0); - else - R_SetColorMapLight(basecolormap, 0, 0); - - for(; (x < x2) && (((size_t)pixel >> pixelshift) & 3); ++x, pixel += pixelsize) - { - light += rw_lightstep; - y1ve[0] = uwal[x];//max(uwal[x],umost[x]); - y2ve[0] = dwal[x];//min(dwal[x],dmost[x]); - if (y2ve[0] <= y1ve[0]) continue; - - if (!fixed) - { // calculate lighting - R_SetColorMapLight(basecolormap, light, wallshade); - } - - dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dc_dest = ylookup[y1ve[0]] * pixelsize + pixel; - dc_count = y2ve[0] - y1ve[0]; - iscale = swal[x] * yrepeat; - dc_iscale = xs_ToFixed(fracbits, iscale); - dc_texturefrac = xs_ToFixed(fracbits, dc_texturemid + iscale * (y1ve[0] - CenterY + 0.5)); - - domvline1(); - } - - for(; x < x2-3; x += 4, pixel += 4 * pixelsize) - { - bad = 0; - for (z = 3, dax = x+3; z >= 0; --z, --dax) - { - y1ve[z] = uwal[dax]; - y2ve[z] = dwal[dax]; - if (y2ve[z] <= y1ve[z]) { bad += 1<> FRACBITS); - iscale = swal[dax] * yrepeat; - vince[z] = xs_ToFixed(fracbits, iscale); - vplce[z] = xs_ToFixed(fracbits, dc_texturemid + iscale * (y1ve[z] - CenterY + 0.5)); - } - if (bad == 15) - { - light += rw_lightstep * 4; - continue; - } - - if (!fixed) - { - for (z = 0; z < 4; ++z) - { - light += rw_lightstep; - if (r_swtruecolor) - { - palookupoffse[z] = basecolormapdata; - palookuplight[z] = LIGHTSCALE(light, wallshade); - } - else - { - palookupoffse[z] = basecolormapdata + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - palookuplight[z] = 0; - } - } - } - - u4 = MAX(MAX(y1ve[0],y1ve[1]),MAX(y1ve[2],y1ve[3])); - d4 = MIN(MIN(y2ve[0],y2ve[1]),MIN(y2ve[2],y2ve[3])); - - if ((bad != 0) || (u4 >= d4)) - { - for (z = 0; z < 4; ++z) - { - if (!(bad & 1)) - { - mvline1(vince[z],palookupoffse[z],palookuplight[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); - } - bad >>= 1; - } - continue; - } - - for (z = 0; z < 4; ++z) - { - if (u4 > y1ve[z]) - { - vplce[z] = mvline1(vince[z],palookupoffse[z],palookuplight[z],u4-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); - } - } - - if (d4 > u4) - { - dc_count = d4-u4; - dc_dest = ylookup[u4]*pixelsize+pixel; - domvline4(); - } - - BYTE *i = pixel+ylookup[d4]*pixelsize; - for (z = 0; z < 4; ++z) - { - if (y2ve[z] > d4) - { - mvline1(vince[z],palookupoffse[0],palookuplight[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z*pixelsize); - } - } - } - for(; x < x2; ++x, pixel += pixelsize) - { - light += rw_lightstep; - y1ve[0] = uwal[x]; - y2ve[0] = dwal[x]; - if (y2ve[0] <= y1ve[0]) continue; - - if (!fixed) - { // calculate lighting - R_SetColorMapLight(basecolormap, light, wallshade); - } - - dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dc_dest = ylookup[y1ve[0]]*pixelsize + pixel; - dc_count = y2ve[0] - y1ve[0]; - iscale = swal[x] * yrepeat; - dc_iscale = xs_ToFixed(fracbits, iscale); - dc_texturefrac = xs_ToFixed(fracbits, dc_texturemid + iscale * (y1ve[0] - CenterY + 0.5)); - - domvline1(); - } - -//unclock(WallScanCycles); - - NetUpdate (); -} - -inline void preptmvline1 (fixed_t vince, BYTE *colormap, fixed_t light, int count, fixed_t vplce, const BYTE *bufplce, BYTE *dest) -{ - dc_iscale = vince; - dc_colormap = colormap; - dc_light = light; - dc_count = count; - dc_texturefrac = vplce; - dc_source = bufplce; - dc_dest = dest; -} - -void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, - double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) -{ - fixed_t (*tmvline1)(); - void (*tmvline4)(); - int x, fracbits; - BYTE *pixel; - int pixelsize, pixelshift; - int y1ve[4], y2ve[4], u4, d4, startx, dax, z; - char bad; - float light = rw_light - rw_lightstep; - SDWORD xoffset; - BYTE *basecolormapdata; - double iscale; - - if (rw_pic->UseType == FTexture::TEX_Null) - { - return; - } - - if (!R_GetTransMaskDrawers (&tmvline1, &tmvline4)) - { - // The current translucency is unsupported, so draw with regular maskwallscan instead. - maskwallscan (x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); - return; - } - -//extern cycle_t WallScanCycles; -//clock (WallScanCycles); - - pixelsize = r_swtruecolor ? 4 : 1; - pixelshift = r_swtruecolor ? 2 : 0; - - rw_pic->GetHeight(); // Make sure texture size is loaded - fracbits = 32 - rw_pic->HeightBits; - setuptmvline(fracbits); - xoffset = rw_offset; - basecolormapdata = basecolormap->Maps; - fixed_t centeryfrac = FLOAT2FIXED(CenterY); - - x = startx = x1; - pixel = x * pixelsize + dc_destorg; - - bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); - if (fixed) - { - palookupoffse[0] = dc_colormap; - palookupoffse[1] = dc_colormap; - palookupoffse[2] = dc_colormap; - palookupoffse[3] = dc_colormap; - palookuplight[0] = 0; - palookuplight[1] = 0; - palookuplight[2] = 0; - palookuplight[3] = 0; - } - - if (fixedcolormap) - R_SetColorMapLight(fixedcolormap, 0, 0); - else - R_SetColorMapLight(basecolormap, 0, 0); - - for(; (x < x2) && (((size_t)pixel >> pixelshift) & 3); ++x, pixel += pixelsize) - { - light += rw_lightstep; - y1ve[0] = uwal[x];//max(uwal[x],umost[x]); - y2ve[0] = dwal[x];//min(dwal[x],dmost[x]); - if (y2ve[0] <= y1ve[0]) continue; - - if (!fixed) - { // calculate lighting - R_SetColorMapLight(basecolormap, light, wallshade); - } - - dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dc_dest = ylookup[y1ve[0]] * pixelsize + pixel; - dc_count = y2ve[0] - y1ve[0]; - iscale = swal[x] * yrepeat; - dc_iscale = xs_ToFixed(fracbits, iscale); - dc_texturefrac = xs_ToFixed(fracbits, dc_texturemid + iscale * (y1ve[0] - CenterY + 0.5)); - - tmvline1(); - } - - for(; x < x2-3; x += 4, pixel += 4 * pixelsize) - { - bad = 0; - for (z = 3, dax = x+3; z >= 0; --z, --dax) - { - y1ve[z] = uwal[dax]; - y2ve[z] = dwal[dax]; - if (y2ve[z] <= y1ve[z]) { bad += 1<> FRACBITS); - iscale = swal[dax] * yrepeat; - vince[z] = xs_ToFixed(fracbits, iscale); - vplce[z] = xs_ToFixed(fracbits, dc_texturemid + vince[z] * (y1ve[z] - CenterY + 0.5)); - } - if (bad == 15) - { - light += rw_lightstep * 4; - continue; - } - - if (!fixed) - { - for (z = 0; z < 4; ++z) - { - light += rw_lightstep; - if (r_swtruecolor) - { - palookupoffse[z] = basecolormapdata; - palookuplight[z] = LIGHTSCALE(light, wallshade); - } - else - { - palookupoffse[z] = basecolormapdata + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - } - } - } - - u4 = MAX(MAX(y1ve[0],y1ve[1]),MAX(y1ve[2],y1ve[3])); - d4 = MIN(MIN(y2ve[0],y2ve[1]),MIN(y2ve[2],y2ve[3])); - - if ((bad != 0) || (u4 >= d4)) - { - for (z = 0; z < 4; ++z) - { - if (!(bad & 1)) - { - preptmvline1(vince[z],palookupoffse[z],palookuplight[z],y2ve[z]-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); - tmvline1(); - } - bad >>= 1; - } - continue; - } - - for (z = 0; z < 4; ++z) - { - if (u4 > y1ve[z]) - { - preptmvline1(vince[z],palookupoffse[z],palookuplight[z],u4-y1ve[z],vplce[z],bufplce[z],(ylookup[y1ve[z]]+z)*pixelsize+pixel); - vplce[z] = tmvline1(); - } - } - - if (d4 > u4) - { - dc_count = d4-u4; - dc_dest = ylookup[u4]*pixelsize+pixel; - tmvline4(); - } - - BYTE *i = pixel+ylookup[d4]*pixelsize; - for (z = 0; z < 4; ++z) - { - if (y2ve[z] > d4) - { - preptmvline1(vince[z],palookupoffse[0],palookuplight[0],y2ve[z]-d4,vplce[z],bufplce[z],i+z*pixelsize); - tmvline1(); - } - } - } - for(; x < x2; ++x, pixel += pixelsize) - { - light += rw_lightstep; - y1ve[0] = uwal[x]; - y2ve[0] = dwal[x]; - if (y2ve[0] <= y1ve[0]) continue; - - if (!fixed) - { // calculate lighting - R_SetColorMapLight(basecolormap, light, wallshade); - } - - dc_source = getcol (rw_pic, (lwal[x] + xoffset) >> FRACBITS); - dc_dest = ylookup[y1ve[0]] * pixelsize + pixel; - dc_count = y2ve[0] - y1ve[0]; - iscale = swal[x] * yrepeat; - dc_iscale = xs_ToFixed(fracbits, iscale); - dc_texturefrac = xs_ToFixed(fracbits, dc_texturemid + iscale * (y1ve[0] - CenterY + 0.5)); - - tmvline1(); - } - -//unclock(WallScanCycles); - - NetUpdate (); + call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, true); } // From 05b6fe6174147ceef8c64ad83a1eab2736080c3e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 10 Jun 2016 13:50:34 +0200 Subject: [PATCH 024/100] Added true color texture support for walls and floors --- src/r_draw.cpp | 7 +- src/r_draw.h | 22 ++- src/r_draw_rgba.cpp | 303 +++++++++++++++++++----------------- src/r_main.h | 46 ++++++ src/r_plane.cpp | 104 +++++++++---- src/r_segs.cpp | 4 + src/textures/pngtexture.cpp | 162 ++++++++++++++++++- src/textures/texture.cpp | 28 ++++ src/textures/textures.h | 12 +- 9 files changed, 508 insertions(+), 180 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 984a74f3f..2710b9992 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -2265,9 +2265,12 @@ const BYTE *R_GetColumn (FTexture *tex, int col) { col = width + (col % width); } - return tex->GetColumn (col, NULL); -} + if (r_swtruecolor) + return (const BYTE *)tex->GetColumnBgra(col, NULL); + else + return tex->GetColumn(col, NULL); +} // [RH] Initialize the column drawer pointers void R_InitColumnDrawers () diff --git a/src/r_draw.h b/src/r_draw.h index bf73c9dfb..3f97a7a65 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -520,6 +520,9 @@ class DrawerCommandQueue std::condition_variable end_condition; int finished_threads = 0; + bool no_threading = false; + DrawerThread single_core_thread; + void StartThreads(); void StopThreads(); @@ -535,11 +538,20 @@ public: template static void QueueCommand(Types &&... args) { - void *ptr = AllocMemory(sizeof(T)); - T *command = new (ptr)T(std::forward(args)...); - if (!command) - return; - Instance()->commands.push_back(command); + auto queue = Instance(); + if (queue->no_threading) + { + T command(std::forward(args)...); + command.Execute(&queue->single_core_thread); + } + else + { + void *ptr = AllocMemory(sizeof(T)); + if (!ptr) + return; + T *command = new (ptr)T(std::forward(args)...); + queue->commands.push_back(command); + } } // Wait until all worker threads finished executing commands diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 489716e1f..528c3c986 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -85,6 +85,8 @@ void* DrawerCommandQueue::AllocMemory(size_t size) void DrawerCommandQueue::Finish() { auto queue = Instance(); + if (queue->commands.empty()) + return; // Give worker threads something to do: @@ -190,8 +192,8 @@ class DrawColumnRGBACommand : public DrawerCommand { int dc_count; BYTE *dc_dest; - fixed_t dc_texturefrac; - fixed_t dc_iscale; + DWORD dc_texturefrac; + DWORD dc_iscale; fixed_t dc_light; const BYTE *dc_source; int dc_pitch; @@ -628,8 +630,8 @@ class DrawAddColumnRGBACommand : public DrawerCommand { int dc_count; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; const BYTE *dc_source; int dc_pitch; fixed_t dc_light; @@ -708,8 +710,8 @@ class DrawTranslatedColumnRGBACommand : public DrawerCommand fixed_t dc_light; ShadeConstants dc_shade_constants; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; BYTE *dc_translation; const BYTE *dc_source; int dc_pitch; @@ -769,8 +771,8 @@ class DrawTlatedAddColumnRGBACommand : public DrawerCommand fixed_t dc_light; ShadeConstants dc_shade_constants; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; BYTE *dc_translation; const BYTE *dc_source; int dc_pitch; @@ -845,8 +847,8 @@ class DrawShadedColumnRGBACommand : public DrawerCommand private: int dc_count; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; fixed_t dc_light; const BYTE *dc_source; lighttable_t *dc_colormap; @@ -918,8 +920,8 @@ class DrawAddClampColumnRGBACommand : public DrawerCommand { int dc_count; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; const BYTE *dc_source; int dc_pitch; fixed_t dc_light; @@ -994,8 +996,8 @@ class DrawAddClampTranslatedColumnRGBACommand : public DrawerCommand { int dc_count; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; BYTE *dc_translation; const BYTE *dc_source; int dc_pitch; @@ -1073,8 +1075,8 @@ class DrawSubClampColumnRGBACommand : public DrawerCommand { int dc_count; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; const BYTE *dc_source; int dc_pitch; fixed_t dc_light; @@ -1149,8 +1151,8 @@ class DrawSubClampTranslatedColumnRGBACommand : public DrawerCommand { int dc_count; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; const BYTE *dc_source; int dc_pitch; fixed_t dc_light; @@ -1228,8 +1230,8 @@ class DrawRevSubClampColumnRGBACommand : public DrawerCommand { int dc_count; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; const BYTE *dc_source; int dc_pitch; fixed_t dc_light; @@ -1303,8 +1305,8 @@ class DrawRevSubClampTranslatedColumnRGBACommand : public DrawerCommand { int dc_count; BYTE *dc_dest; - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; const BYTE *dc_source; int dc_pitch; fixed_t dc_light; @@ -1380,7 +1382,7 @@ public: class DrawSpanRGBACommand : public DrawerCommand { - const BYTE *ds_source; + const uint32_t *ds_source; fixed_t ds_xfrac; fixed_t ds_yfrac; fixed_t ds_xstep; @@ -1397,7 +1399,7 @@ class DrawSpanRGBACommand : public DrawerCommand public: DrawSpanRGBACommand() { - ds_source = ::ds_source; + ds_source = (const uint32_t*)::ds_source; ds_xfrac = ::ds_xfrac; ds_yfrac = ::ds_yfrac; ds_xstep = ::ds_xstep; @@ -1423,7 +1425,7 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const BYTE* source = ds_source; + const uint32_t* source = ds_source; int count; int spot; @@ -1450,7 +1452,7 @@ public: spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); // Lookup pixel from flat texture tile - *dest++ = shade_pal_index(source[spot], light, shade_constants); + *dest++ = shade_bgra(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -1469,7 +1471,7 @@ public: spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); // Lookup pixel from flat texture tile - *dest++ = shade_pal_index(source[spot], light, shade_constants); + *dest++ = shade_bgra(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -1488,7 +1490,7 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const BYTE* source = ds_source; + const uint32_t* source = ds_source; int count; int spot; @@ -1598,7 +1600,7 @@ public: spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); // Lookup pixel from flat texture tile - *dest++ = shade_pal_index(source[spot], light, shade_constants); + *dest++ = shade_bgra(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -1617,7 +1619,7 @@ public: spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); // Lookup pixel from flat texture tile - *dest++ = shade_pal_index(source[spot], light, shade_constants); + *dest++ = shade_bgra(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -1630,7 +1632,7 @@ public: class DrawSpanMaskedRGBACommand : public DrawerCommand { - const BYTE *ds_source; + const uint32_t *ds_source; fixed_t ds_light; ShadeConstants ds_shade_constants; fixed_t ds_xfrac; @@ -1647,7 +1649,7 @@ class DrawSpanMaskedRGBACommand : public DrawerCommand public: DrawSpanMaskedRGBACommand() { - ds_source = ::ds_source; + ds_source = (const uint32_t*)::ds_source; ds_light = ::ds_light; ds_shade_constants = ::ds_shade_constants; ds_xfrac = ::ds_xfrac; @@ -1672,7 +1674,7 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const BYTE* source = ds_source; + const uint32_t* source = ds_source; int count; int spot; @@ -1694,13 +1696,13 @@ public: // 64x64 is the most common case by far, so special case it. do { - BYTE texdata; + uint32_t texdata; spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); texdata = source[spot]; if (texdata != 0) { - *dest = shade_pal_index(texdata, light, shade_constants); + *dest = shade_bgra(texdata, light, shade_constants); } dest++; xfrac += xstep; @@ -1714,13 +1716,13 @@ public: int xmask = ((1 << ds_xbits) - 1) << ds_ybits; do { - BYTE texdata; + uint32_t texdata; spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); texdata = source[spot]; if (texdata != 0) { - *dest = shade_pal_index(texdata, light, shade_constants); + *dest = shade_bgra(texdata, light, shade_constants); } dest++; xfrac += xstep; @@ -1732,7 +1734,7 @@ public: class DrawSpanTranslucentRGBACommand : public DrawerCommand { - const BYTE *ds_source; + const uint32_t *ds_source; fixed_t ds_light; ShadeConstants ds_shade_constants; fixed_t ds_xfrac; @@ -1749,7 +1751,7 @@ class DrawSpanTranslucentRGBACommand : public DrawerCommand public: DrawSpanTranslucentRGBACommand() { - ds_source = ::ds_source; + ds_source = (const uint32_t *)::ds_source; ds_light = ::ds_light; ds_shade_constants = ::ds_shade_constants; ds_xfrac = ::ds_xfrac; @@ -1774,7 +1776,7 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const BYTE* source = ds_source; + const uint32_t* source = ds_source; int count; int spot; @@ -1801,7 +1803,7 @@ public: { spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t fg = shade_pal_index(source[spot], light, shade_constants); + uint32_t fg = shade_bgra(source[spot], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -1829,7 +1831,7 @@ public: { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t fg = shade_pal_index(source[spot], light, shade_constants); + uint32_t fg = shade_bgra(source[spot], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -1853,7 +1855,7 @@ public: class DrawSpanMaskedTranslucentRGBACommand : public DrawerCommand { - const BYTE *ds_source; + const uint32_t *ds_source; fixed_t ds_light; ShadeConstants ds_shade_constants; fixed_t ds_xfrac; @@ -1870,7 +1872,7 @@ class DrawSpanMaskedTranslucentRGBACommand : public DrawerCommand public: DrawSpanMaskedTranslucentRGBACommand() { - ds_source = ::ds_source; + ds_source = (const uint32_t*)::ds_source; ds_light = ::ds_light; ds_shade_constants = ::ds_shade_constants; ds_xfrac = ::ds_xfrac; @@ -1895,7 +1897,7 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const BYTE* source = ds_source; + const uint32_t* source = ds_source; int count; int spot; @@ -1920,13 +1922,13 @@ public: // 64x64 is the most common case by far, so special case it. do { - BYTE texdata; + uint32_t texdata; spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); texdata = source[spot]; if (texdata != 0) { - uint32_t fg = shade_pal_index(texdata, light, shade_constants); + uint32_t fg = shade_bgra(texdata, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -1953,13 +1955,13 @@ public: int xmask = ((1 << ds_xbits) - 1) << ds_ybits; do { - BYTE texdata; + uint32_t texdata; spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); texdata = source[spot]; if (texdata != 0) { - uint32_t fg = shade_pal_index(texdata, light, shade_constants); + uint32_t fg = shade_bgra(texdata, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -1984,7 +1986,7 @@ public: class DrawSpanAddClampRGBACommand : public DrawerCommand { - const BYTE *ds_source; + const uint32_t *ds_source; fixed_t ds_light; ShadeConstants ds_shade_constants; fixed_t ds_xfrac; @@ -2001,7 +2003,7 @@ class DrawSpanAddClampRGBACommand : public DrawerCommand public: DrawSpanAddClampRGBACommand() { - ds_source = ::ds_source; + ds_source = (const uint32_t*)::ds_source; ds_light = ::ds_light; ds_shade_constants = ::ds_shade_constants; ds_xfrac = ::ds_xfrac; @@ -2026,7 +2028,7 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const BYTE* source = ds_source; + const uint32_t* source = ds_source; int count; int spot; @@ -2053,7 +2055,7 @@ public: { spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t fg = shade_pal_index(source[spot], light, shade_constants); + uint32_t fg = shade_bgra(source[spot], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2081,7 +2083,7 @@ public: { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t fg = shade_pal_index(source[spot], light, shade_constants); + uint32_t fg = shade_bgra(source[spot], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2105,7 +2107,7 @@ public: class DrawSpanMaskedAddClampRGBACommand : public DrawerCommand { - const BYTE *ds_source; + const uint32_t *ds_source; fixed_t ds_light; ShadeConstants ds_shade_constants; fixed_t ds_xfrac; @@ -2122,7 +2124,7 @@ class DrawSpanMaskedAddClampRGBACommand : public DrawerCommand public: DrawSpanMaskedAddClampRGBACommand() { - ds_source = ::ds_source; + ds_source = (const uint32_t*)::ds_source; ds_light = ::ds_light; ds_shade_constants = ::ds_shade_constants; ds_xfrac = ::ds_xfrac; @@ -2147,7 +2149,7 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const BYTE* source = ds_source; + const uint32_t* source = ds_source; int count; int spot; @@ -2172,13 +2174,13 @@ public: // 64x64 is the most common case by far, so special case it. do { - BYTE texdata; + uint32_t texdata; spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); texdata = source[spot]; if (texdata != 0) { - uint32_t fg = shade_pal_index(texdata, light, shade_constants); + uint32_t fg = shade_bgra(texdata, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2205,13 +2207,13 @@ public: int xmask = ((1 << ds_xbits) - 1) << ds_ybits; do { - BYTE texdata; + uint32_t texdata; spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); texdata = source[spot]; if (texdata != 0) { - uint32_t fg = shade_pal_index(texdata, light, shade_constants); + uint32_t fg = shade_bgra(texdata, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = (fg) & 0xff; @@ -2270,8 +2272,8 @@ public: class Vlinec1RGBACommand : public DrawerCommand { - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; int dc_count; const BYTE *dc_source; BYTE *dc_dest; @@ -2302,7 +2304,7 @@ public: DWORD fracstep = dc_iscale * thread->num_cores; DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const BYTE *source = dc_source; + const uint32 *source = (const uint32 *)dc_source; uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = vlinebits; int pitch = dc_pitch * thread->num_cores; @@ -2312,7 +2314,7 @@ public: do { - *dest = shade_pal_index(source[frac >> bits], light, shade_constants); + *dest = shade_bgra(source[frac >> bits], light, shade_constants); frac += fracstep; dest += pitch; } while (--count); @@ -2329,7 +2331,7 @@ class Vlinec4RGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const BYTE *bufplce[4]; + const uint32 *bufplce[4]; public: Vlinec4RGBACommand() @@ -2344,7 +2346,7 @@ public: palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; - bufplce[i] = ::bufplce[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; } } @@ -2378,10 +2380,10 @@ public: do { - dest[0] = shade_pal_index(bufplce[0][(place = local_vplce[0]) >> bits], light0, shade_constants); local_vplce[0] = place + local_vince[0]; - dest[1] = shade_pal_index(bufplce[1][(place = local_vplce[1]) >> bits], light1, shade_constants); local_vplce[1] = place + local_vince[1]; - dest[2] = shade_pal_index(bufplce[2][(place = local_vplce[2]) >> bits], light2, shade_constants); local_vplce[2] = place + local_vince[2]; - dest[3] = shade_pal_index(bufplce[3][(place = local_vplce[3]) >> bits], light3, shade_constants); local_vplce[3] = place + local_vince[3]; + dest[0] = shade_bgra(bufplce[0][(place = local_vplce[0]) >> bits], light0, shade_constants); local_vplce[0] = place + local_vince[0]; + dest[1] = shade_bgra(bufplce[1][(place = local_vplce[1]) >> bits], light1, shade_constants); local_vplce[1] = place + local_vince[1]; + dest[2] = shade_bgra(bufplce[2][(place = local_vplce[2]) >> bits], light2, shade_constants); local_vplce[2] = place + local_vince[2]; + dest[3] = shade_bgra(bufplce[3][(place = local_vplce[3]) >> bits], light3, shade_constants); local_vplce[3] = place + local_vince[3]; dest += pitch; } while (--count); } @@ -2403,7 +2405,6 @@ public: ShadeConstants shade_constants = dc_shade_constants; - uint32_t *palette = (uint32_t*)GPalette.BaseColors; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; int skipped = thread->skipped_by_thread(dc_dest_y); @@ -2423,17 +2424,17 @@ public: DWORD place2 = local_vplce[2]; DWORD place3 = local_vplce[3]; - BYTE p0 = bufplce[0][place0 >> bits]; - BYTE p1 = bufplce[1][place1 >> bits]; - BYTE p2 = bufplce[2][place2 >> bits]; - BYTE p3 = bufplce[3][place3 >> bits]; + uint32_t p0 = bufplce[0][place0 >> bits]; + uint32_t p1 = bufplce[1][place1 >> bits]; + uint32_t p2 = bufplce[2][place2 >> bits]; + uint32_t p3 = bufplce[3][place3 >> bits]; local_vplce[0] = place0 + local_vince[0]; local_vplce[1] = place1 + local_vince[1]; local_vplce[2] = place2 + local_vince[2]; local_vplce[3] = place3 + local_vince[3]; - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); SSE_SHADE_SIMPLE(fg); _mm_storeu_si128((__m128i*)dest, fg); dest += pitch; @@ -2449,17 +2450,17 @@ public: DWORD place2 = local_vplce[2]; DWORD place3 = local_vplce[3]; - BYTE p0 = bufplce[0][place0 >> bits]; - BYTE p1 = bufplce[1][place1 >> bits]; - BYTE p2 = bufplce[2][place2 >> bits]; - BYTE p3 = bufplce[3][place3 >> bits]; + uint32_t p0 = bufplce[0][place0 >> bits]; + uint32_t p1 = bufplce[1][place1 >> bits]; + uint32_t p2 = bufplce[2][place2 >> bits]; + uint32_t p3 = bufplce[3][place3 >> bits]; local_vplce[0] = place0 + local_vince[0]; local_vplce[1] = place1 + local_vince[1]; local_vplce[2] = place2 + local_vince[2]; local_vplce[3] = place3 + local_vince[3]; - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); SSE_SHADE(fg, shade_constants); _mm_storeu_si128((__m128i*)dest, fg); dest += pitch; @@ -2471,8 +2472,8 @@ public: class Mvlinec1RGBACommand : public DrawerCommand { - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; int dc_count; const BYTE *dc_source; BYTE *dc_dest; @@ -2503,7 +2504,7 @@ public: DWORD fracstep = dc_iscale * thread->num_cores; DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const BYTE *source = dc_source; + const uint32 *source = (const uint32 *)dc_source; uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = mvlinebits; int pitch = dc_pitch * thread->num_cores; @@ -2513,10 +2514,10 @@ public: do { - BYTE pix = source[frac >> bits]; + uint32_t pix = source[frac >> bits]; if (pix != 0) { - *dest = shade_pal_index(pix, light, shade_constants); + *dest = shade_bgra(pix, light, shade_constants); } frac += fracstep; dest += pitch; @@ -2534,7 +2535,7 @@ class Mvlinec4RGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const BYTE *bufplce[4]; + const uint32 *bufplce[4]; public: Mvlinec4RGBACommand() @@ -2549,7 +2550,7 @@ public: palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; - bufplce[i] = ::bufplce[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; } } @@ -2583,11 +2584,11 @@ public: do { - BYTE pix; - pix = bufplce[0][(place = local_vplce[0]) >> bits]; if (pix) dest[0] = shade_pal_index(pix, light0, shade_constants); local_vplce[0] = place + local_vince[0]; - pix = bufplce[1][(place = local_vplce[1]) >> bits]; if (pix) dest[1] = shade_pal_index(pix, light1, shade_constants); local_vplce[1] = place + local_vince[1]; - pix = bufplce[2][(place = local_vplce[2]) >> bits]; if (pix) dest[2] = shade_pal_index(pix, light2, shade_constants); local_vplce[2] = place + local_vince[2]; - pix = bufplce[3][(place = local_vplce[3]) >> bits]; if (pix) dest[3] = shade_pal_index(pix, light3, shade_constants); local_vplce[3] = place + local_vince[3]; + uint32_t pix; + pix = bufplce[0][(place = local_vplce[0]) >> bits]; if (pix) dest[0] = shade_bgra(pix, light0, shade_constants); local_vplce[0] = place + local_vince[0]; + pix = bufplce[1][(place = local_vplce[1]) >> bits]; if (pix) dest[1] = shade_bgra(pix, light1, shade_constants); local_vplce[1] = place + local_vince[1]; + pix = bufplce[2][(place = local_vplce[2]) >> bits]; if (pix) dest[2] = shade_bgra(pix, light2, shade_constants); local_vplce[2] = place + local_vince[2]; + pix = bufplce[3][(place = local_vplce[3]) >> bits]; if (pix) dest[3] = shade_bgra(pix, light3, shade_constants); local_vplce[3] = place + local_vince[3]; dest += pitch; } while (--count); } @@ -2609,7 +2610,6 @@ public: ShadeConstants shade_constants = dc_shade_constants; - uint32_t *palette = (uint32_t*)GPalette.BaseColors; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; int skipped = thread->skipped_by_thread(dc_dest_y); @@ -2629,10 +2629,10 @@ public: DWORD place2 = local_vplce[2]; DWORD place3 = local_vplce[3]; - BYTE pix0 = bufplce[0][place0 >> bits]; - BYTE pix1 = bufplce[1][place1 >> bits]; - BYTE pix2 = bufplce[2][place2 >> bits]; - BYTE pix3 = bufplce[3][place3 >> bits]; + uint32_t pix0 = bufplce[0][place0 >> bits]; + uint32_t pix1 = bufplce[1][place1 >> bits]; + uint32_t pix2 = bufplce[2][place2 >> bits]; + uint32_t pix3 = bufplce[3][place3 >> bits]; // movemask = !(pix == 0) __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); @@ -2642,7 +2642,7 @@ public: local_vplce[2] = place2 + local_vince[2]; local_vplce[3] = place3 + local_vince[3]; - __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); SSE_SHADE_SIMPLE(fg); _mm_maskmoveu_si128(fg, movemask, (char*)dest); dest += pitch; @@ -2658,10 +2658,10 @@ public: DWORD place2 = local_vplce[2]; DWORD place3 = local_vplce[3]; - BYTE pix0 = bufplce[0][place0 >> bits]; - BYTE pix1 = bufplce[1][place1 >> bits]; - BYTE pix2 = bufplce[2][place2 >> bits]; - BYTE pix3 = bufplce[3][place3 >> bits]; + uint32_t pix0 = bufplce[0][place0 >> bits]; + uint32_t pix1 = bufplce[1][place1 >> bits]; + uint32_t pix2 = bufplce[2][place2 >> bits]; + uint32_t pix3 = bufplce[3][place3 >> bits]; // movemask = !(pix == 0) __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); @@ -2671,7 +2671,7 @@ public: local_vplce[2] = place2 + local_vince[2]; local_vplce[3] = place3 + local_vince[3]; - __m128i fg = _mm_set_epi32(palette[pix3], palette[pix2], palette[pix1], palette[pix0]); + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); SSE_SHADE(fg, shade_constants); _mm_maskmoveu_si128(fg, movemask, (char*)dest); dest += pitch; @@ -2683,8 +2683,8 @@ public: class Tmvline1AddRGBACommand : public DrawerCommand { - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; int dc_count; const BYTE *dc_source; BYTE *dc_dest; @@ -2719,7 +2719,7 @@ public: DWORD fracstep = dc_iscale * thread->num_cores; DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const BYTE *source = dc_source; + const uint32 *source = (const uint32 *)dc_source; uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = tmvlinebits; int pitch = dc_pitch * thread->num_cores; @@ -2732,10 +2732,10 @@ public: do { - BYTE pix = source[frac >> bits]; + uint32_t pix = source[frac >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(pix, light, shade_constants); + uint32_t fg = shade_bgra(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -2768,7 +2768,7 @@ class Tmvline4AddRGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const BYTE *bufplce[4]; + const uint32 *bufplce[4]; public: Tmvline4AddRGBACommand() @@ -2785,7 +2785,7 @@ public: palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; - bufplce[i] = ::bufplce[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; } } @@ -2823,10 +2823,10 @@ public: { for (int i = 0; i < 4; ++i) { - BYTE pix = bufplce[i][local_vplce[i] >> bits]; + uint32_t pix = bufplce[i][local_vplce[i] >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(pix, light[i], shade_constants); + uint32_t fg = shade_bgra(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -2850,8 +2850,8 @@ public: class Tmvline1AddClampRGBACommand : public DrawerCommand { - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; int dc_count; const BYTE *dc_source; BYTE *dc_dest; @@ -2886,7 +2886,7 @@ public: DWORD fracstep = dc_iscale * thread->num_cores; DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const BYTE *source = dc_source; + const uint32 *source = (const uint32 *)dc_source; uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = tmvlinebits; int pitch = dc_pitch * thread->num_cores; @@ -2899,10 +2899,10 @@ public: do { - BYTE pix = source[frac >> bits]; + uint32_t pix = source[frac >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(pix, light, shade_constants); + uint32_t fg = shade_bgra(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -2935,7 +2935,7 @@ class Tmvline4AddClampRGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const BYTE *bufplce[4]; + const uint32 *bufplce[4]; public: Tmvline4AddClampRGBACommand() @@ -2952,7 +2952,7 @@ public: palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; - bufplce[i] = ::bufplce[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; } } @@ -2990,10 +2990,10 @@ public: { for (int i = 0; i < 4; ++i) { - BYTE pix = bufplce[i][local_vplce[i] >> bits]; + uint32_t pix = bufplce[i][local_vplce[i] >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(pix, light[i], shade_constants); + uint32_t fg = shade_bgra(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -3017,8 +3017,8 @@ public: class Tmvline1SubClampRGBACommand : public DrawerCommand { - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; int dc_count; const BYTE *dc_source; BYTE *dc_dest; @@ -3053,7 +3053,7 @@ public: DWORD fracstep = dc_iscale * thread->num_cores; DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const BYTE *source = dc_source; + const uint32 *source = (const uint32 *)dc_source; uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = tmvlinebits; int pitch = dc_pitch * thread->num_cores; @@ -3066,10 +3066,10 @@ public: do { - BYTE pix = source[frac >> bits]; + uint32_t pix = source[frac >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(pix, light, shade_constants); + uint32_t fg = shade_bgra(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -3102,7 +3102,7 @@ class Tmvline4SubClampRGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const BYTE *bufplce[4]; + const uint32 *bufplce[4]; public: Tmvline4SubClampRGBACommand() @@ -3119,7 +3119,7 @@ public: palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; - bufplce[i] = ::bufplce[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; } } @@ -3157,10 +3157,10 @@ public: { for (int i = 0; i < 4; ++i) { - BYTE pix = bufplce[i][local_vplce[i] >> bits]; + uint32_t pix = bufplce[i][local_vplce[i] >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(pix, light[i], shade_constants); + uint32_t fg = shade_bgra(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -3184,8 +3184,8 @@ public: class Tmvline1RevSubClampRGBACommand : public DrawerCommand { - fixed_t dc_iscale; - fixed_t dc_texturefrac; + DWORD dc_iscale; + DWORD dc_texturefrac; int dc_count; const BYTE *dc_source; BYTE *dc_dest; @@ -3220,7 +3220,7 @@ public: DWORD fracstep = dc_iscale * thread->num_cores; DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const BYTE *source = dc_source; + const uint32 *source = (const uint32 *)dc_source; uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); int bits = tmvlinebits; int pitch = dc_pitch * thread->num_cores; @@ -3233,10 +3233,10 @@ public: do { - BYTE pix = source[frac >> bits]; + uint32_t pix = source[frac >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(pix, light, shade_constants); + uint32_t fg = shade_bgra(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -3269,7 +3269,7 @@ class Tmvline4RevSubClampRGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const BYTE *bufplce[4]; + const uint32 *bufplce[4]; public: Tmvline4RevSubClampRGBACommand() @@ -3286,7 +3286,7 @@ public: palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; - bufplce[i] = ::bufplce[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; } } @@ -3324,10 +3324,10 @@ public: { for (int i = 0; i < 4; ++i) { - BYTE pix = bufplce[i][local_vplce[i] >> bits]; + uint32_t pix = bufplce[i][local_vplce[i] >> bits]; if (pix != 0) { - uint32_t fg = shade_pal_index(pix, light[i], shade_constants); + uint32_t fg = shade_bgra(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -3549,8 +3549,17 @@ void R_FillSpan_RGBA() DrawerCommandQueue::QueueCommand(); } +extern FTexture *rw_pic; // For the asserts below + DWORD vlinec1_RGBA() { + DWORD fracstep = dc_iscale; + DWORD frac = dc_texturefrac; + DWORD height = rw_pic->GetHeight(); + assert((frac >> vlinebits) < height); + frac += dc_count * fracstep; + assert((frac >> vlinebits) <= height); + DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } @@ -3558,6 +3567,8 @@ DWORD vlinec1_RGBA() void vlinec4_RGBA() { DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; } DWORD mvlinec1_RGBA() @@ -3569,6 +3580,8 @@ DWORD mvlinec1_RGBA() void mvlinec4_RGBA() { DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_add_RGBA() @@ -3580,6 +3593,8 @@ fixed_t tmvline1_add_RGBA() void tmvline4_add_RGBA() { DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_addclamp_RGBA() @@ -3591,6 +3606,8 @@ fixed_t tmvline1_addclamp_RGBA() void tmvline4_addclamp_RGBA() { DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_subclamp_RGBA() @@ -3602,6 +3619,8 @@ fixed_t tmvline1_subclamp_RGBA() void tmvline4_subclamp_RGBA() { DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_revsubclamp_RGBA() @@ -3613,6 +3632,8 @@ fixed_t tmvline1_revsubclamp_RGBA() void tmvline4_revsubclamp_RGBA() { DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; } void R_DrawFogBoundarySection_RGBA(int y, int y2, int x1) diff --git a/src/r_main.h b/src/r_main.h index 5d4ff1174..6d0e2a21f 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -136,6 +136,19 @@ FORCEINLINE uint32_t shade_pal_index_simple(uint32_t index, uint32_t light) return 0xff000000 | (red << 16) | (green << 8) | blue; } +FORCEINLINE uint32_t shade_bgra_simple(uint32_t color, uint32_t light) +{ + uint32_t red = (color >> 16) & 0xff; + uint32_t green = (color >> 8) & 0xff; + uint32_t blue = color & 0xff; + + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + // Calculates a ARGB8 color for the given palette index, light multiplier and dynamic colormap FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants) { @@ -171,6 +184,39 @@ FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const Shade return 0xff000000 | (red << 16) | (green << 8) | blue; } +FORCEINLINE uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConstants &constants) +{ + uint32_t red = (color >> 16) & 0xff; + uint32_t green = (color >> 8) & 0xff; + uint32_t blue = color & 0xff; + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; + + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; + + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; + + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + // Calculate constants for a simple shade #define SSE_SHADE_SIMPLE_INIT(light) \ __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 26d579d6d..05fce79a6 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -973,15 +973,22 @@ extern FTexture *rw_pic; // Allow for layer skies up to 512 pixels tall. This is overkill, // since the most anyone can ever see of the sky is 500 pixels. // We need 4 skybufs because wallscan can draw up to 4 columns at a time. +// Need two versions - one for true color and one for palette static BYTE skybuf[4][512]; +static uint32_t skybuf_bgra[4][512]; static DWORD lastskycol[4]; +static DWORD lastskycol_bgra[4]; static int skycolplace; +static int skycolplace_bgra; // Get a column of sky when there is only one sky texture. static const BYTE *R_GetOneSkyColumn (FTexture *fronttex, int x) { angle_t column = (skyangle + xtoviewangle[x]) ^ skyflip; - return fronttex->GetColumn((UMulScale16(column, frontcyl) + frontpos) >> FRACBITS, NULL); + if (!r_swtruecolor) + return fronttex->GetColumn((UMulScale16(column, frontcyl) + frontpos) >> FRACBITS, NULL); + else + return (const BYTE *)fronttex->GetColumnBgra((UMulScale16(column, frontcyl) + frontpos) >> FRACBITS, NULL); } // Get a column of sky when there are two overlapping sky textures @@ -996,38 +1003,77 @@ static const BYTE *R_GetTwoSkyColumns (FTexture *fronttex, int x) DWORD skycol = (angle1 << 16) | angle2; int i; - for (i = 0; i < 4; ++i) + if (!r_swtruecolor) { - if (lastskycol[i] == skycol) + for (i = 0; i < 4; ++i) { - return skybuf[i]; + if (lastskycol[i] == skycol) + { + return skybuf[i]; + } } + + lastskycol[skycolplace] = skycol; + BYTE *composite = skybuf[skycolplace]; + skycolplace = (skycolplace + 1) & 3; + + // The ordering of the following code has been tuned to allow VC++ to optimize + // it well. In particular, this arrangement lets it keep count in a register + // instead of on the stack. + const BYTE *front = fronttex->GetColumn(angle1, NULL); + const BYTE *back = backskytex->GetColumn(angle2, NULL); + + int count = MIN(512, MIN(backskytex->GetHeight(), fronttex->GetHeight())); + i = 0; + do + { + if (front[i]) + { + composite[i] = front[i]; + } + else + { + composite[i] = back[i]; + } + } while (++i, --count); + return composite; } - - lastskycol[skycolplace] = skycol; - BYTE *composite = skybuf[skycolplace]; - skycolplace = (skycolplace + 1) & 3; - - // The ordering of the following code has been tuned to allow VC++ to optimize - // it well. In particular, this arrangement lets it keep count in a register - // instead of on the stack. - const BYTE *front = fronttex->GetColumn (angle1, NULL); - const BYTE *back = backskytex->GetColumn (angle2, NULL); - - int count = MIN (512, MIN (backskytex->GetHeight(), fronttex->GetHeight())); - i = 0; - do + else { - if (front[i]) + return R_GetOneSkyColumn(fronttex, x); + for (i = 0; i < 4; ++i) { - composite[i] = front[i]; + if (lastskycol_bgra[i] == skycol) + { + return (BYTE*)(skybuf_bgra[i]); + } } - else + + lastskycol_bgra[skycolplace_bgra] = skycol; + uint32_t *composite = skybuf_bgra[skycolplace_bgra]; + skycolplace_bgra = (skycolplace_bgra + 1) & 3; + + // The ordering of the following code has been tuned to allow VC++ to optimize + // it well. In particular, this arrangement lets it keep count in a register + // instead of on the stack. + const uint32_t *front = (const uint32_t *)fronttex->GetColumnBgra(angle1, NULL); + const uint32_t *back = (const uint32_t *)backskytex->GetColumnBgra(angle2, NULL); + + int count = MIN(512, MIN(backskytex->GetHeight(), fronttex->GetHeight())); + i = 0; + do { - composite[i] = back[i]; - } - } while (++i, --count); - return composite; + if (front[i]) + { + composite[i] = front[i]; + } + else + { + composite[i] = back[i]; + } + } while (++i, --count); + return (BYTE*)composite; + } } static void R_DrawSky (visplane_t *pl) @@ -1062,6 +1108,7 @@ static void R_DrawSky (visplane_t *pl) for (x = 0; x < 4; ++x) { lastskycol[x] = 0xffffffff; + lastskycol_bgra[x] = 0xffffffff; } rw_pic = frontskytex; @@ -1075,6 +1122,7 @@ static void R_DrawSky (visplane_t *pl) for (x = 0; x < 4; ++x) { lastskycol[x] = 0xffffffff; + lastskycol_bgra[x] = 0xffffffff; } wallscan (pl->left, pl->right, (short *)pl->top, (short *)pl->bottom, swall, lwall, frontyScale, backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); @@ -1112,6 +1160,7 @@ static void R_DrawSkyStriped (visplane_t *pl) for (x = 0; x < 4; ++x) { lastskycol[x] = 0xffffffff; + lastskycol_bgra[x] = 0xffffffff; } wallscan (pl->left, pl->right, top, bot, swall, lwall, rw_pic->Scale.Y, backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); @@ -1230,7 +1279,10 @@ void R_DrawSinglePlane (visplane_t *pl, fixed_t alpha, bool additive, bool maske R_SetupSpanBits(tex); double xscale = pl->xform.xScale * tex->Scale.X; double yscale = pl->xform.yScale * tex->Scale.Y; - ds_source = tex->GetPixels (); + if (r_swtruecolor) + ds_source = (const BYTE*)tex->GetPixelsBgra(); + else + ds_source = tex->GetPixels(); basecolormap = pl->colormap; planeshade = LIGHT2SHADE(pl->lightlevel); diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 5aa7c29a2..87ce48ec4 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1092,6 +1092,8 @@ uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv { int next_uv_wrap = (uv_max - uv_pos + uv_step - 1) / uv_step; int count = MIN(left, next_uv_wrap); + if (count <= 0) + break; // This should never happen, but it does.. if (count > 0) { @@ -1146,6 +1148,8 @@ void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_ste int next_uv_wrap = (uv_max - uv_pos[i] + uv_step[i] - 1) / uv_step[i]; count = MIN(next_uv_wrap, count); } + if (count <= 0) + break; // This should never happen, but it does.. // Draw until that column wraps if (count > 0) diff --git a/src/textures/pngtexture.cpp b/src/textures/pngtexture.cpp index e47fa62c0..95f7aca75 100644 --- a/src/textures/pngtexture.cpp +++ b/src/textures/pngtexture.cpp @@ -41,6 +41,7 @@ #include "bitmap.h" #include "v_palette.h" #include "textures/textures.h" +#include //========================================================================== // @@ -56,6 +57,7 @@ public: const BYTE *GetColumn (unsigned int column, const Span **spans_out); const BYTE *GetPixels (); + const uint32_t *GetPixelsBgra (); void Unload (); FTextureFormat GetFormat (); int CopyTrueColorPixels(FBitmap *bmp, int x, int y, int rotate, FCopyInfo *inf = NULL); @@ -65,6 +67,7 @@ protected: FString SourceFile; BYTE *Pixels; + std::vector PixelsBgra; Span **Spans; BYTE BitDepth; @@ -73,11 +76,13 @@ protected: bool HaveTrans; WORD NonPaletteTrans[3]; + std::vector PngPalette; BYTE *PaletteMap; int PaletteSize; DWORD StartOfIDAT; void MakeTexture (); + void MakeTextureBgra (); friend class FTexture; }; @@ -266,6 +271,12 @@ FPNGTexture::FPNGTexture (FileReader &lump, int lumpnum, const FString &filename { lump.Seek (len - PaletteSize * 3, SEEK_CUR); } + for (i = 0; i < PaletteSize; i++) + { + PngPalette.push_back(p.pngpal[i][0]); + PngPalette.push_back(p.pngpal[i][1]); + PngPalette.push_back(p.pngpal[i][2]); + } for (i = PaletteSize - 1; i >= 0; --i) { p.palette[i] = MAKERGB(p.pngpal[i][0], p.pngpal[i][1], p.pngpal[i][2]); @@ -369,11 +380,9 @@ FPNGTexture::~FPNGTexture () void FPNGTexture::Unload () { - if (Pixels != NULL) - { - delete[] Pixels; - Pixels = NULL; - } + delete[] Pixels; + Pixels = NULL; + PixelsBgra.clear(); } //========================================================================== @@ -446,6 +455,16 @@ const BYTE *FPNGTexture::GetPixels () return Pixels; } +const uint32_t *FPNGTexture::GetPixelsBgra() +{ + if (PixelsBgra.empty()) + { + MakeTextureBgra(); + } + return PixelsBgra.data(); +} + + //========================================================================== // // @@ -602,6 +621,139 @@ void FPNGTexture::MakeTexture () delete lump; } +void FPNGTexture::MakeTextureBgra () +{ + FileReader *lump; + + if (SourceLump >= 0) + { + lump = new FWadLump(Wads.OpenLumpNum(SourceLump)); + } + else + { + lump = new FileReader(SourceFile.GetChars()); + } + + PixelsBgra.resize(Width * Height, 0xffff0000); + if (StartOfIDAT != 0) + { + DWORD len, id; + lump->Seek (StartOfIDAT, SEEK_SET); + lump->Read(&len, 4); + lump->Read(&id, 4); + + if (ColorType == 0 || ColorType == 3) /* Grayscale and paletted */ + { + std::vector src(Width*Height); + M_ReadIDAT (lump, src.data(), Width, Height, Width, BitDepth, ColorType, Interlace, BigLong((unsigned int)len)); + + if (!PngPalette.empty()) + { + for (int x = 0; x < Width; x++) + { + for (int y = 0; y < Height; y++) + { + uint32_t r = PngPalette[src[x + y * Width] * 3 + 0]; + uint32_t g = PngPalette[src[x + y * Width] * 3 + 1]; + uint32_t b = PngPalette[src[x + y * Width] * 3 + 2]; + PixelsBgra[x * Height + y] = 0xff000000 | (r << 16) | (g << 8) | b; + } + } + } + else + { + for (int x = 0; x < Width; x++) + { + for (int y = 0; y < Height; y++) + { + uint32_t gray = src[x + y * Width]; + PixelsBgra[x * Height + y] = 0xff000000 | (gray << 16) | (gray << 8) | gray; + } + } + } + } + else /* RGB and/or Alpha present */ + { + int bytesPerPixel = ColorType == 2 ? 3 : ColorType == 4 ? 2 : 4; + BYTE *tempix = new BYTE[Width * Height * bytesPerPixel]; + BYTE *in; + uint32_t *out; + int x, y, pitch, backstep; + + M_ReadIDAT (lump, tempix, Width, Height, Width*bytesPerPixel, BitDepth, ColorType, Interlace, BigLong((unsigned int)len)); + in = tempix; + out = PixelsBgra.data(); + + // Convert from source format to paletted, column-major. + // Formats with alpha maps are reduced to only 1 bit of alpha. + switch (ColorType) + { + case 2: // RGB + pitch = Width * 3; + backstep = Height * pitch - 3; + for (x = Width; x > 0; --x) + { + for (y = Height; y > 0; --y) + { + if (!HaveTrans) + { + *out++ = 0xff000000 | (((uint32_t)in[0]) << 16) | (((uint32_t)in[1]) << 8) | ((uint32_t)in[2]); + } + else + { + if (in[0] == NonPaletteTrans[0] && + in[1] == NonPaletteTrans[1] && + in[2] == NonPaletteTrans[2]) + { + *out++ = 0; + } + else + { + *out++ = 0xff000000 | (((uint32_t)in[0]) << 16) | (((uint32_t)in[1]) << 8) | ((uint32_t)in[2]); + } + } + in += pitch; + } + in -= backstep; + } + break; + + case 4: // Grayscale + Alpha + pitch = Width * 2; + backstep = Height * pitch - 2; + for (x = Width; x > 0; --x) + { + for (y = Height; y > 0; --y) + { + uint32_t alpha = in[1]; + uint32_t gray = in[0]; + *out++ = (alpha << 24) | (gray << 16) | (gray << 8) | gray; + in += pitch; + } + in -= backstep; + } + break; + + case 6: // RGB + Alpha + pitch = Width * 4; + backstep = Height * pitch - 4; + for (x = Width; x > 0; --x) + { + for (y = Height; y > 0; --y) + { + *out++ = (((uint32_t)in[3]) << 24) | (((uint32_t)in[0]) << 16) | (((uint32_t)in[1]) << 8) | ((uint32_t)in[2]); + in += pitch; + } + in -= backstep; + } + break; + } + delete[] tempix; + } + } + delete lump; +} + //=========================================================================== // // FPNGTexture::CopyTrueColorPixels diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index 7b90c295f..1869491b1 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -45,6 +45,7 @@ #include "v_video.h" #include "m_fixed.h" #include "textures/textures.h" +#include "v_palette.h" typedef bool (*CheckFunc)(FileReader & file); typedef FTexture * (*CreateFunc)(FileReader & file, int lumpnum); @@ -175,6 +176,33 @@ FTexture::~FTexture () KillNative(); } +const uint32_t *FTexture::GetColumnBgra(unsigned int column, const Span **spans_out) +{ + const uint32_t *pixels = GetPixelsBgra(); + + column %= Width; + if (column < 0) + column += Width; + + if (spans_out != nullptr) + GetColumn(column, spans_out); + return pixels + column * Height; +} + +const uint32_t *FTexture::GetPixelsBgra() +{ + if (BgraPixels.empty()) + { + const BYTE *indices = GetPixels(); + BgraPixels.resize(Width * Height); + for (int i = 0; i < Width * Height; i++) + { + BgraPixels[i] = GPalette.BaseColors[indices[i]].d; + } + } + return BgraPixels.data(); +} + bool FTexture::CheckModified () { return false; diff --git a/src/textures/textures.h b/src/textures/textures.h index 14667093c..0d066eff5 100644 --- a/src/textures/textures.h +++ b/src/textures/textures.h @@ -3,6 +3,7 @@ #include "doomtype.h" #include "vectors.h" +#include class FBitmap; struct FRemapTable; @@ -175,9 +176,15 @@ public: // Returns a single column of the texture virtual const BYTE *GetColumn (unsigned int column, const Span **spans_out) = 0; + // Returns a single column of the texture, in BGRA8 format + virtual const uint32_t *GetColumnBgra(unsigned int column, const Span **spans_out); + // Returns the whole texture, stored in column-major order virtual const BYTE *GetPixels () = 0; - + + // Returns the whole texture, stored in column-major order, in BGRA8 format + virtual const uint32_t *GetPixelsBgra(); + virtual int CopyTrueColorPixels(FBitmap *bmp, int x, int y, int rotate=0, FCopyInfo *inf = NULL); int CopyTrueColorTranslated(FBitmap *bmp, int x, int y, int rotate, FRemapTable *remap, FCopyInfo *inf = NULL); virtual bool UseBasePalette(); @@ -262,6 +269,9 @@ protected: Rotations = other->Rotations; } +private: + std::vector BgraPixels; + public: static void FlipSquareBlock (BYTE *block, int x, int y); static void FlipSquareBlockRemap (BYTE *block, int x, int y, const BYTE *remap); From 24f846f702bb6f8fb57a4a6370f33267442e385c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 10 Jun 2016 14:25:56 +0200 Subject: [PATCH 025/100] Bug fixes --- src/r_draw_rgba.cpp | 6 ++---- src/r_main.h | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 528c3c986..fa632cb5d 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -1511,8 +1511,6 @@ public: { // 64x64 is the most common case by far, so special case it. - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - int sse_count = count / 4; count -= sse_count * 4; @@ -1545,7 +1543,7 @@ public: // Lookup pixel from flat texture tile, // re-index using light/colormap. - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); SSE_SHADE_SIMPLE(fg); _mm_storeu_si128((__m128i*)dest, fg); @@ -1582,7 +1580,7 @@ public: // Lookup pixel from flat texture tile, // re-index using light/colormap. - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); SSE_SHADE(fg, shade_constants); _mm_storeu_si128((__m128i*)dest, fg); diff --git a/src/r_main.h b/src/r_main.h index 6d0e2a21f..d71d44fe1 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -217,6 +217,27 @@ FORCEINLINE uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConst return 0xff000000 | (red << 16) | (green << 8) | blue; } +FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) +{ + uint32_t fg_alpha = (fg >> 24) & 0xff; + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t alpha = fg_alpha + (fg_alpha >> 7); // 255 -> 256 + uint32_t inv_alpha = 256 - alpha; + + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = bg & 0xff; + + uint32_t red = ((fg_red * alpha) + (bg_red * inv_alpha)) / 256; + uint32_t green = ((fg_green * alpha) + (bg_green * inv_alpha)) / 256; + uint32_t blue = ((fg_blue * alpha) + (bg_blue * inv_alpha)) / 256; + + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + // Calculate constants for a simple shade #define SSE_SHADE_SIMPLE_INIT(light) \ __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ From 6c70eaea2f46cdd2625ad0b51bbcc194ac3200e8 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 10 Jun 2016 15:56:50 +0200 Subject: [PATCH 026/100] Add jpeg bgra support --- src/textures/jpegtexture.cpp | 116 +++++++++++++++++++++++++++++++++-- 1 file changed, 111 insertions(+), 5 deletions(-) diff --git a/src/textures/jpegtexture.cpp b/src/textures/jpegtexture.cpp index 225396598..a37eff6c3 100644 --- a/src/textures/jpegtexture.cpp +++ b/src/textures/jpegtexture.cpp @@ -187,6 +187,7 @@ public: const BYTE *GetColumn (unsigned int column, const Span **spans_out); const BYTE *GetPixels (); + const uint32_t *GetPixelsBgra (); void Unload (); FTextureFormat GetFormat (); int CopyTrueColorPixels(FBitmap *bmp, int x, int y, int rotate, FCopyInfo *inf = NULL); @@ -195,9 +196,11 @@ public: protected: BYTE *Pixels; + std::vector PixelsBgra; Span DummySpans[2]; void MakeTexture (); + void MakeTextureBgra (); friend class FTexture; }; @@ -295,11 +298,9 @@ FJPEGTexture::~FJPEGTexture () void FJPEGTexture::Unload () { - if (Pixels != NULL) - { - delete[] Pixels; - Pixels = NULL; - } + delete[] Pixels; + Pixels = NULL; + PixelsBgra.clear(); } //========================================================================== @@ -358,6 +359,15 @@ const BYTE *FJPEGTexture::GetPixels () return Pixels; } +const uint32_t *FJPEGTexture::GetPixelsBgra() +{ + if (PixelsBgra.empty()) + { + MakeTextureBgra(); + } + return PixelsBgra.data(); +} + //========================================================================== // // @@ -457,6 +467,102 @@ void FJPEGTexture::MakeTexture () } } +void FJPEGTexture::MakeTextureBgra() +{ + FWadLump lump = Wads.OpenLumpNum(SourceLump); + JSAMPLE *buff = NULL; + + jpeg_decompress_struct cinfo; + jpeg_error_mgr jerr; + + PixelsBgra.resize(Width * Height, 0xffba0000); + + cinfo.err = jpeg_std_error(&jerr); + cinfo.err->output_message = JPEG_OutputMessage; + cinfo.err->error_exit = JPEG_ErrorExit; + jpeg_create_decompress(&cinfo); + try + { + FLumpSourceMgr sourcemgr(&lump, &cinfo); + jpeg_read_header(&cinfo, TRUE); + if (!((cinfo.out_color_space == JCS_RGB && cinfo.num_components == 3) || + (cinfo.out_color_space == JCS_CMYK && cinfo.num_components == 4) || + (cinfo.out_color_space == JCS_GRAYSCALE && cinfo.num_components == 1))) + { + Printf(TEXTCOLOR_ORANGE "Unsupported color format\n"); + throw - 1; + } + + jpeg_start_decompress(&cinfo); + + int y = 0; + buff = new BYTE[cinfo.output_width * cinfo.output_components]; + + while (cinfo.output_scanline < cinfo.output_height) + { + int num_scanlines = jpeg_read_scanlines(&cinfo, &buff, 1); + BYTE *in = buff; + uint32_t *out = PixelsBgra.data() + y; + switch (cinfo.out_color_space) + { + case JCS_RGB: + for (int x = Width; x > 0; --x) + { + uint32_t r = in[0]; + uint32_t g = in[1]; + uint32_t b = in[2]; + *out = 0xff000000 | (r << 16) | (g << 8) | b; + out += Height; + in += 3; + } + break; + + case JCS_GRAYSCALE: + for (int x = Width; x > 0; --x) + { + uint32_t gray = in[0]; + *out = 0xff000000 | (gray << 16) | (gray << 8) | gray; + out += Height; + in += 1; + } + break; + + case JCS_CMYK: + // What are you doing using a CMYK image? :) + for (int x = Width; x > 0; --x) + { + // To be precise, these calculations should use 255, but + // 256 is much faster and virtually indistinguishable. + uint32_t r = in[3] - (((256 - in[0])*in[3]) >> 8); + uint32_t g = in[3] - (((256 - in[1])*in[3]) >> 8); + uint32_t b = in[3] - (((256 - in[2])*in[3]) >> 8); + *out = 0xff000000 | (r << 16) | (g << 8) | b; + out += Height; + in += 4; + } + break; + + default: + // The other colorspaces were considered above and discarded, + // but GCC will complain without a default for them here. + break; + } + y++; + } + jpeg_finish_decompress(&cinfo); + jpeg_destroy_decompress(&cinfo); + } + catch (int) + { + Printf(TEXTCOLOR_ORANGE " in texture %s\n", Name.GetChars()); + jpeg_destroy_decompress(&cinfo); + } + if (buff != NULL) + { + delete[] buff; + } +} + //=========================================================================== // From 103a6baac54d7d874021ad94f40098cc66d787dc Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 10 Jun 2016 15:57:31 +0200 Subject: [PATCH 027/100] Support for drawing in multiple passes --- src/r_draw.h | 17 +++++-- src/r_draw_rgba.cpp | 105 +++++++++++++++++++++++++++++++++++++++---- src/r_drawt_rgba.cpp | 62 +++++++++++++------------ 3 files changed, 141 insertions(+), 43 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index 3f97a7a65..409b7c01b 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -458,25 +458,34 @@ public: // Number of active threads int num_cores = 1; + // Range of rows processed this pass + int pass_start_y = 0; + int pass_end_y = 300; + uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; uint32_t *dc_temp_rgba; // Checks if a line is rendered by this thread bool line_skipped_by_thread(int line) { - return line % num_cores != core; + return line < pass_start_y || line >= pass_end_y || line % num_cores != core; } // The number of lines to skip to reach the first line to be rendered by this thread int skipped_by_thread(int first_line) { - return (num_cores - (first_line - core) % num_cores) % num_cores; + int pass_skip = MAX(pass_start_y - first_line, 0); + int core_skip = (num_cores - (first_line + pass_skip - core) % num_cores) % num_cores; + return pass_skip + core_skip; } // The number of lines to be rendered by this thread int count_for_thread(int first_line, int count) { - return (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores; + int lines_until_pass_end = MAX(pass_end_y - first_line, 0); + count = MIN(count, lines_until_pass_end); + int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores; + return MAX(c, 0); } // Calculate the dest address for the first line to be rendered by this thread @@ -522,6 +531,8 @@ class DrawerCommandQueue bool no_threading = false; DrawerThread single_core_thread; + int num_passes = 2; + int rows_in_pass = 540; void StartThreads(); void StopThreads(); diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index fa632cb5d..b81ee4cca 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -104,11 +104,19 @@ void DrawerCommandQueue::Finish() thread.core = 0; thread.num_cores = queue->threads.size() + 1; - size_t size = queue->active_commands.size(); - for (size_t i = 0; i < size; i++) + for (int pass = 0; pass < queue->num_passes; pass++) { - auto &command = queue->active_commands[i]; - command->Execute(&thread); + thread.pass_start_y = pass * queue->rows_in_pass; + thread.pass_end_y = (pass + 1) * queue->rows_in_pass; + if (pass + 1 == queue->num_passes) + thread.pass_end_y = MAX(thread.pass_end_y, MAXHEIGHT); + + size_t size = queue->active_commands.size(); + for (size_t i = 0; i < size; i++) + { + auto &command = queue->active_commands[i]; + command->Execute(&thread); + } } // Wait for everyone to finish: @@ -156,11 +164,19 @@ void DrawerCommandQueue::StartThreads() start_lock.unlock(); // Do the work: - size_t size = queue->active_commands.size(); - for (size_t i = 0; i < size; i++) + for (int pass = 0; pass < queue->num_passes; pass++) { - auto &command = queue->active_commands[i]; - command->Execute(thread); + thread->pass_start_y = pass * queue->rows_in_pass; + thread->pass_end_y = (pass + 1) * queue->rows_in_pass; + if (pass + 1 == queue->num_passes) + thread->pass_end_y = MAX(thread->pass_end_y, MAXHEIGHT); + + size_t size = queue->active_commands.size(); + for (size_t i = 0; i < size; i++) + { + auto &command = queue->active_commands[i]; + command->Execute(thread); + } } // Notify main thread that we finished: @@ -1611,6 +1627,79 @@ public: BYTE xshift = yshift - ds_xbits; int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + int sse_count = count / 4; + count -= sse_count * 4; + + if (shade_constants.simple_shade) + { + SSE_SHADE_SIMPLE_INIT(light); + + while (sse_count--) + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + SSE_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += 4; + } + } + else + { + SSE_SHADE_INIT(light, shade_constants); + + while (sse_count--) + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + SSE_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + dest += 4; + } + } + + if (count == 0) + return; + do { // Current texture index in u,v. diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index c2caec0c2..28c86d3f5 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -1528,40 +1528,38 @@ public: fracstep = dc_iscale; frac = dc_texturefrac; - { - const BYTE *source = dc_source; + const BYTE *source = dc_source; - if (count & 1) { - *dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep; - } - if (count & 2) { - dest[0] = source[frac >> FRACBITS]; frac += fracstep; - dest[4] = source[frac >> FRACBITS]; frac += fracstep; - dest += 8; - } - if (count & 4) { - dest[0] = source[frac >> FRACBITS]; frac += fracstep; - dest[4] = source[frac >> FRACBITS]; frac += fracstep; - dest[8] = source[frac >> FRACBITS]; frac += fracstep; - dest[12] = source[frac >> FRACBITS]; frac += fracstep; - dest += 16; - } - count >>= 3; - if (!count) return; - - do - { - dest[0] = source[frac >> FRACBITS]; frac += fracstep; - dest[4] = source[frac >> FRACBITS]; frac += fracstep; - dest[8] = source[frac >> FRACBITS]; frac += fracstep; - dest[12] = source[frac >> FRACBITS]; frac += fracstep; - dest[16] = source[frac >> FRACBITS]; frac += fracstep; - dest[20] = source[frac >> FRACBITS]; frac += fracstep; - dest[24] = source[frac >> FRACBITS]; frac += fracstep; - dest[28] = source[frac >> FRACBITS]; frac += fracstep; - dest += 32; - } while (--count); + if (count & 1) { + *dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep; } + if (count & 2) { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest += 8; + } + if (count & 4) { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest[8] = source[frac >> FRACBITS]; frac += fracstep; + dest[12] = source[frac >> FRACBITS]; frac += fracstep; + dest += 16; + } + count >>= 3; + if (!count) return; + + do + { + dest[0] = source[frac >> FRACBITS]; frac += fracstep; + dest[4] = source[frac >> FRACBITS]; frac += fracstep; + dest[8] = source[frac >> FRACBITS]; frac += fracstep; + dest[12] = source[frac >> FRACBITS]; frac += fracstep; + dest[16] = source[frac >> FRACBITS]; frac += fracstep; + dest[20] = source[frac >> FRACBITS]; frac += fracstep; + dest[24] = source[frac >> FRACBITS]; frac += fracstep; + dest[28] = source[frac >> FRACBITS]; frac += fracstep; + dest += 32; + } while (--count); } }; From 27156eb60ad4d12e32ea24cf90437483c2373818 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 10 Jun 2016 16:32:47 +0200 Subject: [PATCH 028/100] Linux compile fixes and missing variable declarations --- src/r_draw.h | 5 +++-- src/r_draw_rgba.cpp | 5 +++++ src/r_segs.cpp | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index 409b7c01b..37a0e6778 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -28,6 +28,7 @@ #include #include #include +#include // Spectre/Invisibility. #define FUZZTABLE 50 @@ -460,7 +461,7 @@ public: // Range of rows processed this pass int pass_start_y = 0; - int pass_end_y = 300; + int pass_end_y = MAXHEIGHT; uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; uint32_t *dc_temp_rgba; @@ -527,7 +528,7 @@ class DrawerCommandQueue std::mutex end_mutex; std::condition_variable end_condition; - int finished_threads = 0; + size_t finished_threads = 0; bool no_threading = false; DrawerThread single_core_thread; diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index b81ee4cca..e2dbd443a 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -1726,6 +1726,7 @@ class DrawSpanMaskedRGBACommand : public DrawerCommand fixed_t ds_yfrac; BYTE *dc_destorg; int ds_x1; + int ds_x2; int ds_y1; int ds_y; fixed_t ds_xstep; @@ -1828,6 +1829,7 @@ class DrawSpanTranslucentRGBACommand : public DrawerCommand fixed_t ds_yfrac; BYTE *dc_destorg; int ds_x1; + int ds_x2; int ds_y1; int ds_y; fixed_t ds_xstep; @@ -1949,6 +1951,7 @@ class DrawSpanMaskedTranslucentRGBACommand : public DrawerCommand fixed_t ds_yfrac; BYTE *dc_destorg; int ds_x1; + int ds_x2; int ds_y1; int ds_y; fixed_t ds_xstep; @@ -2080,6 +2083,7 @@ class DrawSpanAddClampRGBACommand : public DrawerCommand fixed_t ds_yfrac; BYTE *dc_destorg; int ds_x1; + int ds_x2; int ds_y1; int ds_y; fixed_t ds_xstep; @@ -2201,6 +2205,7 @@ class DrawSpanMaskedAddClampRGBACommand : public DrawerCommand fixed_t ds_yfrac; BYTE *dc_destorg; int ds_x1; + int ds_x2; int ds_y1; int ds_y; fixed_t ds_xstep; diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 87ce48ec4..c1d1ad744 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1184,7 +1184,7 @@ void calc_uv_start_and_step(int y1, float swal, double yrepeat, uint32_t uv_heig // Find start uv in [0-uv_height[ range. // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / uv_height; - v = v - std::floor(v); + v = v - floor(v); v *= uv_height; v *= (1 << fracbits); From ffcfe0b54f19420b51284abcbfbec09f3aee9074 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 10 Jun 2016 17:08:45 +0200 Subject: [PATCH 029/100] Fix warning --- src/textures/texture.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index 1869491b1..d50081062 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -181,8 +181,6 @@ const uint32_t *FTexture::GetColumnBgra(unsigned int column, const Span **spans_ const uint32_t *pixels = GetPixelsBgra(); column %= Width; - if (column < 0) - column += Width; if (spans_out != nullptr) GetColumn(column, spans_out); From 07571da98ccad2fd2c360c4b73ba989a902f184b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 10 Jun 2016 18:43:49 +0200 Subject: [PATCH 030/100] Improved how threaded rendering is handled --- src/r_draw.h | 16 ++++++++++++---- src/r_draw_rgba.cpp | 24 ++++++++++++++++++++++-- src/r_main.cpp | 4 +++- src/r_swrenderer.cpp | 3 ++- 4 files changed, 39 insertions(+), 8 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index 37a0e6778..d192dc5e4 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -443,8 +443,11 @@ void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade); void R_SetTranslationMap(lighttable_t *translation); +// Redirect drawer commands to worker threads +void R_BeginDrawerCommands(); + // Wait until all drawers finished executing -void R_FinishDrawerCommands(); +void R_EndDrawerCommands(); class DrawerCommandQueue; @@ -530,13 +533,14 @@ class DrawerCommandQueue std::condition_variable end_condition; size_t finished_threads = 0; - bool no_threading = false; + int threaded_render = 0; DrawerThread single_core_thread; int num_passes = 2; int rows_in_pass = 540; void StartThreads(); void StopThreads(); + void Finish(); static DrawerCommandQueue *Instance(); @@ -551,7 +555,7 @@ public: static void QueueCommand(Types &&... args) { auto queue = Instance(); - if (queue->no_threading) + if (queue->threaded_render == 0) { T command(std::forward(args)...); command.Execute(&queue->single_core_thread); @@ -565,9 +569,13 @@ public: queue->commands.push_back(command); } } + + // Redirects all drawing commands to worker threads until Finish is called + // Begin/End blocks can be nested. + static void Begin(); // Wait until all worker threads finished executing commands - static void Finish(); + static void End(); }; #endif diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index e2dbd443a..23ab106a6 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -82,6 +82,21 @@ void* DrawerCommandQueue::AllocMemory(size_t size) return data; } +void DrawerCommandQueue::Begin() +{ + auto queue = Instance(); + queue->Finish(); + queue->threaded_render++; +} + +void DrawerCommandQueue::End() +{ + auto queue = Instance(); + queue->Finish(); + if (queue->threaded_render > 0) + queue->threaded_render--; +} + void DrawerCommandQueue::Finish() { auto queue = Instance(); @@ -3515,9 +3530,14 @@ public: ///////////////////////////////////////////////////////////////////////////// -void R_FinishDrawerCommands() +void R_BeginDrawerCommands() { - DrawerCommandQueue::Finish(); + DrawerCommandQueue::Begin(); +} + +void R_EndDrawerCommands() +{ + DrawerCommandQueue::End(); } void R_DrawColumnP_RGBA_C() diff --git a/src/r_main.cpp b/src/r_main.cpp index 348c70120..c1b78303b 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -960,6 +960,8 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, r_swtruecolor = canvas->IsBgra(); R_InitColumnDrawers(); } + + R_BeginDrawerCommands(); viewwidth = width; RenderTarget = canvas; @@ -979,7 +981,7 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, R_SetupBuffer (); screen->Unlock (); - R_FinishDrawerCommands(); + R_EndDrawerCommands(); viewactive = savedviewactive; r_swtruecolor = savedoutputformat; diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 62190b606..11f879c38 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -162,10 +162,11 @@ void FSoftwareRenderer::RenderView(player_t *player) R_InitColumnDrawers(); } + R_BeginDrawerCommands(); R_RenderActorView (player->mo); // [RH] Let cameras draw onto textures that were visible this frame. FCanvasTextureInfo::UpdateAll (); - R_FinishDrawerCommands(); + R_EndDrawerCommands(); } //========================================================================== From d5331e60951bbd4509ce1e2f5be13239c58a04d6 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 10 Jun 2016 22:22:40 +0200 Subject: [PATCH 031/100] Wallscan fix --- src/r_draw_rgba.cpp | 8 +++---- src/r_segs.cpp | 53 ++++++++++++++++++++++----------------------- 2 files changed, 30 insertions(+), 31 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 23ab106a6..e3a64dd7a 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -3661,16 +3661,16 @@ void R_FillSpan_RGBA() DrawerCommandQueue::QueueCommand(); } -extern FTexture *rw_pic; // For the asserts below +//extern FTexture *rw_pic; // For the asserts below DWORD vlinec1_RGBA() { - DWORD fracstep = dc_iscale; + /*DWORD fracstep = dc_iscale; DWORD frac = dc_texturefrac; DWORD height = rw_pic->GetHeight(); assert((frac >> vlinebits) < height); - frac += dc_count * fracstep; - assert((frac >> vlinebits) <= height); + frac += (dc_count-1) * fracstep; + assert((frac >> vlinebits) <= height);*/ DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; diff --git a/src/r_segs.cpp b/src/r_segs.cpp index c1d1ad744..451ddf986 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1087,23 +1087,21 @@ uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv { uint32_t uv_pos = uv_start; - int left = y2 - y1; + uint32_t left = y2 - y1; while (left > 0) { - int next_uv_wrap = (uv_max - uv_pos + uv_step - 1) / uv_step; - int count = MIN(left, next_uv_wrap); - if (count <= 0) - break; // This should never happen, but it does.. + uint32_t available = uv_max - uv_pos; + uint32_t next_uv_wrap = available / uv_step; + if (available % uv_step != 0) + next_uv_wrap++; + uint32_t count = MIN(left, next_uv_wrap); - if (count > 0) - { - dc_source = source; - dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; - dc_count = count; - dc_iscale = uv_step; - dc_texturefrac = uv_pos; - draw1column(); - } + dc_source = source; + dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_count = count; + dc_iscale = uv_step; + dc_texturefrac = uv_pos; + draw1column(); left -= count; uv_pos += uv_step * count; @@ -1138,30 +1136,28 @@ void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_ste for (int i = 0; i < 4; i++) bufplce[i] = source[i]; - int left = y2 - y1; + uint32_t left = y2 - y1; while (left > 0) { // Find which column wraps first - int count = left; + uint32_t count = left; for (int i = 0; i < 4; i++) { - int next_uv_wrap = (uv_max - uv_pos[i] + uv_step[i] - 1) / uv_step[i]; + uint32_t available = uv_max - uv_pos[i]; + uint32_t next_uv_wrap = available / uv_step[i]; + if (available % uv_step[i] != 0) + next_uv_wrap++; count = MIN(next_uv_wrap, count); } - if (count <= 0) - break; // This should never happen, but it does.. // Draw until that column wraps - if (count > 0) + for (int i = 0; i < 4; i++) { - for (int i = 0; i < 4; i++) - { - vplce[i] = uv_pos[i]; - vince[i] = uv_step[i]; - } - dc_count = count; - draw4columns(); + vplce[i] = uv_pos[i]; + vince[i] = uv_step[i]; } + dc_count = count; + draw4columns(); // Wrap the uv position for (int i = 0; i < 4; i++) @@ -1299,6 +1295,9 @@ void wallscan_any( { for (int i = 0; i < 4; i++) { + if (y2[i] <= y1[i]) + continue; + if (!fixed) R_SetColorMapLight(basecolormap, lights[i], wallshade); wallscan_drawcol1(x + i, y1[i], y2[i], uv_pos[i], uv_step[i], uv_max, source[i], draw1column); From a6d696bbfd45c30bd7162ab948cdf4ba36fa170e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 11 Jun 2016 00:50:36 +0200 Subject: [PATCH 032/100] Undo removal of wallscan_np2 and wallscan_np2_ds --- src/r_segs.cpp | 131 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 115 insertions(+), 16 deletions(-) diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 451ddf986..ad242b2f9 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1072,16 +1072,17 @@ uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv if (uv_max == 0) // power of two { int count = y2 - y1; - if (count > 0) - { - dc_source = source; - dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; - dc_count = y2 - y1; - dc_iscale = uv_step; - dc_texturefrac = uv_start; - draw1column(); - } - return uv_start + uv_step * (uint32_t)count; + + dc_source = source; + dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_count = count; + dc_iscale = uv_step; + dc_texturefrac = uv_start; + draw1column(); + + uint64_t step64 = uv_step; + uint64_t pos64 = uv_start; + return (uint32_t)(pos64 + step64 * count); } else { @@ -1119,15 +1120,19 @@ void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_ste int pixelsize = r_swtruecolor ? 4 : 1; if (uv_max == 0) // power of two, no wrap handling needed { + int count = y2 - y1; for (int i = 0; i < 4; i++) { bufplce[i] = source[i]; vplce[i] = uv_pos[i]; vince[i] = uv_step[i]; - uv_pos[i] += uv_step[i] * (y2 - y1); + + uint64_t step64 = uv_step[i]; + uint64_t pos64 = uv_pos[i]; + uv_pos[i] = (uint32_t)(pos64 + step64 * count); } dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; - dc_count = y2 - y1; + dc_count = count; draw4columns(); } else @@ -1481,16 +1486,110 @@ static void call_wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, } } -// wallscan now tiles with non-power-of-two textures - this function is therefore not needed anymore.. +//============================================================================= +// +// wallscan_np2 +// +// This is a wrapper around wallscan that helps it tile textures whose heights +// are not powers of 2. It divides the wall into texture-sized strips and calls +// wallscan for each of those. Since only one repetition of the texture fits +// in each strip, wallscan will not tile. +// +//============================================================================= + void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, double top, double bot, bool mask) { - call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, mask); + if (!r_np2) + { + call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, mask); + } + else + { + short most1[MAXWIDTH], most2[MAXWIDTH], most3[MAXWIDTH]; + short *up, *down; + double texheight = rw_pic->GetHeight(); + double partition; + double scaledtexheight = texheight / yrepeat; + + if (yrepeat >= 0) + { // normal orientation: draw strips from top to bottom + partition = top - fmod(top - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); + if (partition == top) + { + partition -= scaledtexheight; + } + up = uwal; + down = most1; + dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; + while (partition > bot) + { + int j = OWallMost(most3, partition - ViewPos.Z, &WallC); + if (j != 3) + { + for (int j = x1; j < x2; ++j) + { + down[j] = clamp(most3[j], up[j], dwal[j]); + } + call_wallscan(x1, x2, up, down, swal, lwal, yrepeat, mask); + up = down; + down = (down == most1) ? most2 : most1; + } + partition -= scaledtexheight; + dc_texturemid -= texheight; + } + call_wallscan(x1, x2, up, dwal, swal, lwal, yrepeat, mask); + } + else + { // upside down: draw strips from bottom to top + partition = bot - fmod(bot - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); + up = most1; + down = dwal; + dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; + while (partition < top) + { + int j = OWallMost(most3, partition - ViewPos.Z, &WallC); + if (j != 12) + { + for (int j = x1; j < x2; ++j) + { + up[j] = clamp(most3[j], uwal[j], down[j]); + } + call_wallscan(x1, x2, up, down, swal, lwal, yrepeat, mask); + down = up; + up = (up == most1) ? most2 : most1; + } + partition -= scaledtexheight; + dc_texturemid -= texheight; + } + call_wallscan(x1, x2, uwal, down, swal, lwal, yrepeat, mask); + } + } } -// wallscan now tiles with non-power-of-two textures - this function is therefore not needed anymore.. static void wallscan_np2_ds(drawseg_t *ds, int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat) { - call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, true); + if (rw_pic->GetHeight() != 1 << rw_pic->HeightBits) + { + double frontcz1 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v1); + double frontfz1 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v1); + double frontcz2 = ds->curline->frontsector->ceilingplane.ZatPoint(ds->curline->v2); + double frontfz2 = ds->curline->frontsector->floorplane.ZatPoint(ds->curline->v2); + double top = MAX(frontcz1, frontcz2); + double bot = MIN(frontfz1, frontfz2); + if (fake3D & FAKE3D_CLIPTOP) + { + top = MIN(top, sclipTop); + } + if (fake3D & FAKE3D_CLIPBOTTOM) + { + bot = MAX(bot, sclipBottom); + } + wallscan_np2(x1, x2, uwal, dwal, swal, lwal, yrepeat, top, bot, true); + } + else + { + call_wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, true); + } } // From 5ae8e9e8c2a68fb55aab598ba46ad86762fc8806 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 11 Jun 2016 16:17:30 +0200 Subject: [PATCH 033/100] Fix missing colormap lookup --- src/r_draw.cpp | 69 +++++++++++++-------------- src/r_draw_rgba.cpp | 28 ++++++----- src/r_drawt_rgba.cpp | 108 +++++++++++++++++++++++++------------------ 3 files changed, 106 insertions(+), 99 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 2710b9992..70b3893f4 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -2793,24 +2793,19 @@ bool R_GetTransMaskDrawers (fixed_t (**tmvline1)(), void (**tmvline4)()) void R_SetTranslationMap(lighttable_t *translation) { dc_fcolormap = nullptr; - dc_shade_constants.light_red = 256; - dc_shade_constants.light_green = 256; - dc_shade_constants.light_blue = 256; - dc_shade_constants.light_alpha = 256; - dc_shade_constants.fade_red = 0; - dc_shade_constants.fade_green = 0; - dc_shade_constants.fade_blue = 0; - dc_shade_constants.fade_alpha = 256; - dc_shade_constants.desaturate = 0; - dc_shade_constants.simple_shade = true; + dc_colormap = translation; if (r_swtruecolor) { - dc_colormap = translation; - dc_light = 0; - } - else - { - dc_colormap = translation; + dc_shade_constants.light_red = 256; + dc_shade_constants.light_green = 256; + dc_shade_constants.light_blue = 256; + dc_shade_constants.light_alpha = 256; + dc_shade_constants.fade_red = 0; + dc_shade_constants.fade_green = 0; + dc_shade_constants.fade_blue = 0; + dc_shade_constants.fade_alpha = 256; + dc_shade_constants.desaturate = 0; + dc_shade_constants.simple_shade = true; dc_light = 0; } } @@ -2818,49 +2813,47 @@ void R_SetTranslationMap(lighttable_t *translation) void R_SetColorMapLight(FColormap *base_colormap, float light, int shade) { dc_fcolormap = base_colormap; - dc_shade_constants.light_red = dc_fcolormap->Color.r * 256 / 255; - dc_shade_constants.light_green = dc_fcolormap->Color.g * 256 / 255; - dc_shade_constants.light_blue = dc_fcolormap->Color.b * 256 / 255; - dc_shade_constants.light_alpha = dc_fcolormap->Color.a * 256 / 255; - dc_shade_constants.fade_red = dc_fcolormap->Fade.r; - dc_shade_constants.fade_green = dc_fcolormap->Fade.g; - dc_shade_constants.fade_blue = dc_fcolormap->Fade.b; - dc_shade_constants.fade_alpha = dc_fcolormap->Fade.a; - dc_shade_constants.desaturate = MIN(abs(dc_fcolormap->Desaturate), 255) * 255 / 256; - dc_shade_constants.simple_shade = (dc_fcolormap->Color.d == 0x00ffffff && dc_fcolormap->Fade.d == 0x00000000 && dc_fcolormap->Desaturate == 0); if (r_swtruecolor) { + dc_shade_constants.light_red = dc_fcolormap->Color.r * 256 / 255; + dc_shade_constants.light_green = dc_fcolormap->Color.g * 256 / 255; + dc_shade_constants.light_blue = dc_fcolormap->Color.b * 256 / 255; + dc_shade_constants.light_alpha = dc_fcolormap->Color.a * 256 / 255; + dc_shade_constants.fade_red = dc_fcolormap->Fade.r; + dc_shade_constants.fade_green = dc_fcolormap->Fade.g; + dc_shade_constants.fade_blue = dc_fcolormap->Fade.b; + dc_shade_constants.fade_alpha = dc_fcolormap->Fade.a; + dc_shade_constants.desaturate = MIN(abs(dc_fcolormap->Desaturate), 255) * 255 / 256; + dc_shade_constants.simple_shade = (dc_fcolormap->Color.d == 0x00ffffff && dc_fcolormap->Fade.d == 0x00000000 && dc_fcolormap->Desaturate == 0); dc_colormap = base_colormap->Maps; dc_light = LIGHTSCALE(light, shade); } else { dc_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); - dc_light = 0; } } void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade) { ds_fcolormap = base_colormap; - ds_shade_constants.light_red = ds_fcolormap->Color.r * 256 / 255; - ds_shade_constants.light_green = ds_fcolormap->Color.g * 256 / 255; - ds_shade_constants.light_blue = ds_fcolormap->Color.b * 256 / 255; - ds_shade_constants.light_alpha = ds_fcolormap->Color.a * 256 / 255; - ds_shade_constants.fade_red = ds_fcolormap->Fade.r; - ds_shade_constants.fade_green = ds_fcolormap->Fade.g; - ds_shade_constants.fade_blue = ds_fcolormap->Fade.b; - ds_shade_constants.fade_alpha = ds_fcolormap->Fade.a; - ds_shade_constants.desaturate = MIN(abs(ds_fcolormap->Desaturate), 255) * 255 / 256; - ds_shade_constants.simple_shade = (ds_fcolormap->Color.d == 0x00ffffff && ds_fcolormap->Fade.d == 0x00000000 && ds_fcolormap->Desaturate == 0); if (r_swtruecolor) { + ds_shade_constants.light_red = ds_fcolormap->Color.r * 256 / 255; + ds_shade_constants.light_green = ds_fcolormap->Color.g * 256 / 255; + ds_shade_constants.light_blue = ds_fcolormap->Color.b * 256 / 255; + ds_shade_constants.light_alpha = ds_fcolormap->Color.a * 256 / 255; + ds_shade_constants.fade_red = ds_fcolormap->Fade.r; + ds_shade_constants.fade_green = ds_fcolormap->Fade.g; + ds_shade_constants.fade_blue = ds_fcolormap->Fade.b; + ds_shade_constants.fade_alpha = ds_fcolormap->Fade.a; + ds_shade_constants.desaturate = MIN(abs(ds_fcolormap->Desaturate), 255) * 255 / 256; + ds_shade_constants.simple_shade = (ds_fcolormap->Color.d == 0x00ffffff && ds_fcolormap->Fade.d == 0x00000000 && ds_fcolormap->Desaturate == 0); ds_colormap = base_colormap->Maps; ds_light = LIGHTSCALE(light, shade); } else { ds_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); - ds_light = 0; } } diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index e3a64dd7a..979dc0743 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -229,6 +229,7 @@ class DrawColumnRGBACommand : public DrawerCommand const BYTE *dc_source; int dc_pitch; ShadeConstants dc_shade_constants; + BYTE *dc_colormap; public: DrawColumnRGBACommand() @@ -241,6 +242,7 @@ public: dc_source = ::dc_source; dc_pitch = ::dc_pitch; dc_shade_constants = ::dc_shade_constants; + dc_colormap = ::dc_colormap; } void Execute(DrawerThread *thread) override @@ -267,24 +269,20 @@ public: fracstep = dc_iscale * thread->num_cores; frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + // [RH] Get local copies of these variables so that the compiler + // has a better chance of optimizing this well. + const BYTE *source = dc_source; + int pitch = dc_pitch * thread->num_cores; + BYTE *colormap = dc_colormap; + + do { - // [RH] Get local copies of these variables so that the compiler - // has a better chance of optimizing this well. - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; + *dest = shade_pal_index(colormap[source[frac >> FRACBITS]], light, shade_constants); - // Inner loop that does the actual texture mapping, - // e.g. a DDA-lile scaling. - // This is as fast as it gets. - do - { - *dest = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); + dest += pitch; + frac += fracstep; - dest += pitch; - frac += fracstep; - - } while (--count); - } + } while (--count); } }; diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 28c86d3f5..5f0fc4156 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -90,13 +90,13 @@ public: sincr = thread->num_cores * 4; if (count & 1) { - *dest = *source; + *dest = GPalette.BaseColors[*source]; source += sincr; dest += pitch; } if (count & 2) { - dest[0] = source[0]; - dest[pitch] = source[sincr]; + dest[0] = GPalette.BaseColors[source[0]]; + dest[pitch] = GPalette.BaseColors[source[sincr]]; source += sincr * 2; dest += pitch * 2; } @@ -104,10 +104,10 @@ public: return; do { - dest[0] = source[0]; - dest[pitch] = source[sincr]; - dest[pitch * 2] = source[sincr * 2]; - dest[pitch * 3] = source[sincr * 3]; + dest[0] = GPalette.BaseColors[source[0]]; + dest[pitch] = GPalette.BaseColors[source[sincr]]; + dest[pitch * 2] = GPalette.BaseColors[source[sincr * 2]]; + dest[pitch * 3] = GPalette.BaseColors[source[sincr * 3]]; source += sincr * 4; dest += pitch * 4; } while (--count); @@ -124,6 +124,7 @@ class RtMap1colRGBACommand : public DrawerCommand ShadeConstants dc_shade_constants; BYTE *dc_destorg; int dc_pitch; + BYTE *dc_colormap; public: RtMap1colRGBACommand(int hx, int sx, int yl, int yh) @@ -137,6 +138,7 @@ public: dc_shade_constants = ::dc_shade_constants; dc_destorg = ::dc_destorg; dc_pitch = ::dc_pitch; + dc_colormap = ::dc_colormap; } void Execute(DrawerThread *thread) override @@ -158,9 +160,11 @@ public: source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; pitch = dc_pitch * thread->num_cores; sincr = thread->num_cores * 4; + + BYTE *colormap = dc_colormap; if (count & 1) { - *dest = shade_pal_index(*source, light, shade_constants); + *dest = shade_pal_index(colormap[*source], light, shade_constants); source += sincr; dest += pitch; } @@ -168,8 +172,8 @@ public: return; do { - dest[0] = shade_pal_index(source[0], light, shade_constants); - dest[pitch] = shade_pal_index(source[sincr], light, shade_constants); + dest[0] = shade_pal_index(colormap[source[0]], light, shade_constants); + dest[pitch] = shade_pal_index(colormap[source[sincr]], light, shade_constants); source += sincr * 2; dest += pitch * 2; } while (--count); @@ -185,6 +189,7 @@ class RtMap4colsRGBACommand : public DrawerCommand ShadeConstants dc_shade_constants; BYTE *dc_destorg; int dc_pitch; + BYTE *colormap; public: RtMap4colsRGBACommand(int sx, int yl, int yh) @@ -197,6 +202,7 @@ public: dc_shade_constants = ::dc_shade_constants; dc_destorg = ::dc_destorg; dc_pitch = ::dc_pitch; + dc_colormap = ::dc_colormap; } #ifdef NO_SSE @@ -219,12 +225,14 @@ public: source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; pitch = dc_pitch * thread->num_cores; sincr = thread->num_cores * 4; + + BYTE *colormap = dc_colormap; if (count & 1) { - dest[0] = shade_pal_index(source[0], light, shade_constants); - dest[1] = shade_pal_index(source[1], light, shade_constants); - dest[2] = shade_pal_index(source[2], light, shade_constants); - dest[3] = shade_pal_index(source[3], light, shade_constants); + dest[0] = shade_pal_index(colormap[source[0]], light, shade_constants); + dest[1] = shade_pal_index(colormap[source[1]], light, shade_constants); + dest[2] = shade_pal_index(colormap[source[2]], light, shade_constants); + dest[3] = shade_pal_index(colormap[source[3]], light, shade_constants); source += sincr; dest += pitch; } @@ -232,14 +240,14 @@ public: return; do { - dest[0] = shade_pal_index(source[0], light, shade_constants); - dest[1] = shade_pal_index(source[1], light, shade_constants); - dest[2] = shade_pal_index(source[2], light, shade_constants); - dest[3] = shade_pal_index(source[3], light, shade_constants); - dest[pitch] = shade_pal_index(source[sincr], light, shade_constants); - dest[pitch + 1] = shade_pal_index(source[sincr + 1], light, shade_constants); - dest[pitch + 2] = shade_pal_index(source[sincr + 2], light, shade_constants); - dest[pitch + 3] = shade_pal_index(source[sincr + 3], light, shade_constants); + dest[0] = shade_pal_index(colormap[source[0]], light, shade_constants); + dest[1] = shade_pal_index(colormap[source[1]], light, shade_constants); + dest[2] = shade_pal_index(colormap[source[2]], light, shade_constants); + dest[3] = shade_pal_index(colormap[source[3]], light, shade_constants); + dest[pitch] = shade_pal_index(colormap[source[sincr]], light, shade_constants); + dest[pitch + 1] = shade_pal_index(colormap[source[sincr + 1]], light, shade_constants); + dest[pitch + 2] = shade_pal_index(colormap[source[sincr + 2]], light, shade_constants); + dest[pitch + 3] = shade_pal_index(colormap[source[sincr + 3]], light, shade_constants); source += sincr * 2; dest += pitch * 2; } while (--count); @@ -265,16 +273,18 @@ public: source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; pitch = dc_pitch * thread->num_cores; sincr = thread->num_cores * 4; + + BYTE *colormap = dc_colormap; if (shade_constants.simple_shade) { SSE_SHADE_SIMPLE_INIT(light); if (count & 1) { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; // shade_pal_index: __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); @@ -290,10 +300,10 @@ public: do { // shade_pal_index 0-3 { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); SSE_SHADE_SIMPLE(fg); @@ -302,10 +312,10 @@ public: // shade_pal_index 4-7 (pitch) { - uint32_t p0 = source[sincr]; - uint32_t p1 = source[sincr + 1]; - uint32_t p2 = source[sincr + 2]; - uint32_t p3 = source[sincr + 3]; + uint32_t p0 = colormap[source[sincr]]; + uint32_t p1 = colormap[source[sincr + 1]]; + uint32_t p2 = colormap[source[sincr + 2]]; + uint32_t p3 = colormap[source[sincr + 3]]; __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); SSE_SHADE_SIMPLE(fg); @@ -321,10 +331,10 @@ public: SSE_SHADE_INIT(light, shade_constants); if (count & 1) { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; // shade_pal_index: __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); @@ -340,10 +350,10 @@ public: do { // shade_pal_index 0-3 { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); SSE_SHADE(fg, shade_constants); @@ -352,10 +362,10 @@ public: // shade_pal_index 4-7 (pitch) { - uint32_t p0 = source[sincr]; - uint32_t p1 = source[sincr + 1]; - uint32_t p2 = source[sincr + 2]; - uint32_t p3 = source[sincr + 3]; + uint32_t p0 = colormap[source[sincr]]; + uint32_t p1 = colormap[source[sincr + 1]]; + uint32_t p2 = colormap[source[sincr + 2]]; + uint32_t p3 = colormap[source[sincr + 3]]; __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); SSE_SHADE(fg, shade_constants); @@ -1800,6 +1810,9 @@ void rt_span_coverage_rgba(int x, int start, int stop) // drawn to the screen along with up to three other columns. void R_DrawColumnHorizP_RGBA_C (void) { + if (dc_count <= 0) + return; + int x = dc_x & 3; unsigned int **span = &dc_ctspan[x]; (*span)[0] = dc_yl; @@ -1812,6 +1825,9 @@ void R_DrawColumnHorizP_RGBA_C (void) // [RH] Just fills a column with a given color void R_FillColumnHorizP_RGBA_C (void) { + if (dc_count <= 0) + return; + int x = dc_x & 3; unsigned int **span = &dc_ctspan[x]; (*span)[0] = dc_yl; From 40b76dc9b0cd8d59c0f2c597cc4a690cb78ab89e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 11 Jun 2016 18:41:56 +0200 Subject: [PATCH 034/100] Apply gamma when using true color output on Linux and Mac --- src/posix/cocoa/i_video.mm | 5 +-- src/posix/sdl/sdlvideo.cpp | 12 +------ src/v_video.cpp | 67 ++++++++++++++++++++++++++++++++++++-- src/v_video.h | 1 + 4 files changed, 68 insertions(+), 17 deletions(-) diff --git a/src/posix/cocoa/i_video.mm b/src/posix/cocoa/i_video.mm index 425fe5887..ddfccaa57 100644 --- a/src/posix/cocoa/i_video.mm +++ b/src/posix/cocoa/i_video.mm @@ -869,10 +869,7 @@ void CocoaFrameBuffer::Update() if (IsBgra()) { - for (int y = 0; y < Height; y++) - { - memcpy((uint32_t*)m_pixelBuffer + y * Width, (uint32_t*)MemBuffer + y * Pitch, Width * BYTES_PER_PIXEL); - } + CopyWithGammaBgra(m_pixelBuffer, Width * BYTES_PER_PIXEL, m_gammaTable[0], m_gammaTable[1], m_gammaTable[2], m_flashColor, m_flashAmount); } else { diff --git a/src/posix/sdl/sdlvideo.cpp b/src/posix/sdl/sdlvideo.cpp index 26121aa71..56b883978 100644 --- a/src/posix/sdl/sdlvideo.cpp +++ b/src/posix/sdl/sdlvideo.cpp @@ -497,17 +497,7 @@ void SDLFB::Update () if (Bgra) { - if (pitch == Pitch * 4) - { - memcpy(pixels, MemBuffer, Width*Height*4); - } - else - { - for (int y = 0; y < Height; ++y) - { - memcpy((BYTE *)pixels + y*pitch, MemBuffer + y*Pitch*4, Width*4); - } - } + CopyWithGammaBgra(pixels, pitch, GammaTable[0], GammaTable[1], GammaTable[2], Flash, FlashAmount); } else if (NotPaletted) { diff --git a/src/v_video.cpp b/src/v_video.cpp index bc99edbf1..e58638121 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -742,13 +742,12 @@ void DCanvas::CalcGamma (float gamma, BYTE gammalookup[256]) // I found this formula on the web at // , // but that page no longer exits. - double invgamma = 1.f / gamma; int i; for (i = 0; i < 256; i++) { - gammalookup[i] = (BYTE)(255.0 * pow (i / 255.0, invgamma)); + gammalookup[i] = (BYTE)(255.0 * pow (i / 255.0, invgamma) + 0.5); } } @@ -876,6 +875,70 @@ DFrameBuffer::DFrameBuffer (int width, int height, bool bgra) Accel2D = false; } +//========================================================================== +// +// DFrameBuffer :: PostprocessBgra +// +// Copies data to destination buffer while performing gamma and flash. +// This is only needed if a target cannot do this with shaders. +// +//========================================================================== + +void DFrameBuffer::CopyWithGammaBgra(void *output, int pitch, const BYTE *gammared, const BYTE *gammagreen, const BYTE *gammablue, PalEntry flash, int flash_amount) +{ + const BYTE *gammatables[3] = { gammared, gammagreen, gammablue }; + + if (flash_amount > 0) + { + uint16_t inv_flash_amount = 256 - flash_amount; + uint16_t flash_red = flash.r * flash_amount; + uint16_t flash_green = flash.g * flash_amount; + uint16_t flash_blue = flash.b * flash_amount; + + for (int y = 0; y < Height; y++) + { + BYTE *dest = (BYTE*)output + y * pitch; + BYTE *src = MemBuffer + y * Pitch * 4; + for (int x = 0; x < Width; x++) + { + uint16_t fg_red = src[2]; + uint16_t fg_green = src[1]; + uint16_t fg_blue = src[0]; + uint16_t red = (fg_red * inv_flash_amount + flash_red) >> 8; + uint16_t green = (fg_green * inv_flash_amount + flash_green) >> 8; + uint16_t blue = (fg_blue * inv_flash_amount + flash_blue) >> 8; + + dest[0] = gammatables[2][blue]; + dest[1] = gammatables[1][green]; + dest[2] = gammatables[0][red]; + dest[3] = 0xff; + + dest += 4; + src += 4; + } + } + } + else + { + for (int y = 0; y < Height; y++) + { + BYTE *dest = (BYTE*)output + y * pitch; + BYTE *src = MemBuffer + y * Pitch * 4; + for (int x = 0; x < Width; x++) + { + dest[0] = gammatables[2][src[0]]; + dest[1] = gammatables[1][src[1]]; + dest[2] = gammatables[0][src[2]]; + dest[3] = 0xff; + + dest += 4; + src += 4; + } + } + } +} + + //========================================================================== // // DFrameBuffer :: DrawRateStuff diff --git a/src/v_video.h b/src/v_video.h index 120beff9a..19213bd26 100644 --- a/src/v_video.h +++ b/src/v_video.h @@ -420,6 +420,7 @@ public: protected: void DrawRateStuff (); void CopyFromBuff (BYTE *src, int srcPitch, int width, int height, BYTE *dest); + void CopyWithGammaBgra(void *output, int pitch, const BYTE *gammared, const BYTE *gammagreen, const BYTE *gammablue, PalEntry flash, int flash_amount); DFrameBuffer () {} From 42efc7334e5e5dc0419b020b4db36777bc647be4 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 12 Jun 2016 00:50:43 +0200 Subject: [PATCH 035/100] Fix missing particles in true color mode --- src/r_draw.h | 7 +++++-- src/r_draw_rgba.cpp | 5 +++++ src/r_things.cpp | 2 ++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index d192dc5e4..55ad8a0ca 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -570,12 +570,15 @@ public: } } - // Redirects all drawing commands to worker threads until Finish is called + // Redirects all drawing commands to worker threads until End is called // Begin/End blocks can be nested. static void Begin(); - // Wait until all worker threads finished executing commands + // End redirection and wait until all worker threads finished executing static void End(); + + // Waits until all worker threads finished executing + static void WaitForWorkers(); }; #endif diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 979dc0743..af8487964 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -97,6 +97,11 @@ void DrawerCommandQueue::End() queue->threaded_render--; } +void DrawerCommandQueue::WaitForWorkers() +{ + Instance()->Finish(); +} + void DrawerCommandQueue::Finish() { auto queue = Instance(); diff --git a/src/r_things.cpp b/src/r_things.cpp index 933d50e46..0c5e17b7c 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -2686,6 +2686,8 @@ void R_DrawParticle_RGBA(vissprite_t *vis) int countbase = vis->x2 - x1; R_DrawMaskedSegsBehindParticle(vis); + + DrawerCommandQueue::WaitForWorkers(); uint32_t fg = shade_pal_index_simple(color, calc_light_multiplier(LIGHTSCALE(0, vis->Style.ColormapNum << FRACBITS))); uint32_t fg_red = (fg >> 16) & 0xff; From 350857a9f6b2e6aa0a0da9f6eb27c2b05066c80e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 12 Jun 2016 19:19:44 +0200 Subject: [PATCH 036/100] Fixed fuzz effect when using multiple cores --- src/r_draw_rgba.cpp | 89 ++++++++++++--------------------------------- 1 file changed, 23 insertions(+), 66 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index af8487964..a5d924dfa 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -583,79 +583,36 @@ public: dest = thread->dest_for_thread(dc_yl, dc_pitch, ylookup[dc_yl] + dc_x + (uint32_t*)dc_destorg); - // Note: this implementation assumes this function is only used for the pinky shadow effect (i.e. no other fancy colormap than black) - // I'm not sure if this is really always the case or not. + int pitch = dc_pitch * thread->num_cores; + int fuzzstep = thread->num_cores; + int fuzz = (fuzzpos + thread->skipped_by_thread(dc_yl)) % FUZZTABLE; + while (count > 0) { - // [RH] Make local copies of global vars to try and improve - // the optimizations made by the compiler. - int pitch = dc_pitch * thread->num_cores; - int fuzz = fuzzpos; - int cnt; + int available = (FUZZTABLE - fuzz); + int next_wrap = available / fuzzstep; + if (available % fuzzstep != 0) + next_wrap++; - // [RH] Split this into three separate loops to minimize - // the number of times fuzzpos needs to be clamped. - if (fuzz) + int cnt = MIN(count, next_wrap); + count -= cnt; + do { - cnt = MIN(FUZZTABLE - fuzz, count); - count -= cnt; - do - { - uint32_t bg = dest[fuzzoffset[fuzz++]]; - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = (bg) & 0xff; + uint32_t bg = dest[fuzzoffset[fuzz]]; + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = (bg) & 0xff; - uint32_t red = bg_red * 3 / 4; - uint32_t green = bg_green * 3 / 4; - uint32_t blue = bg_blue * 3 / 4; + uint32_t red = bg_red * 3 / 4; + uint32_t green = bg_green * 3 / 4; + uint32_t blue = bg_blue * 3 / 4; - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--cnt); - } - if (fuzz == FUZZTABLE || count > 0) - { - while (count >= FUZZTABLE) - { - fuzz = 0; - cnt = FUZZTABLE; - count -= FUZZTABLE; - do - { - uint32_t bg = dest[fuzzoffset[fuzz++]]; - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = (bg) & 0xff; + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + fuzz += fuzzstep; + } while (--cnt); - uint32_t red = bg_red * 3 / 4; - uint32_t green = bg_green * 3 / 4; - uint32_t blue = bg_blue * 3 / 4; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--cnt); - } - fuzz = 0; - if (count > 0) - { - do - { - uint32_t bg = dest[fuzzoffset[fuzz++]]; - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = (bg) & 0xff; - - uint32_t red = bg_red * 3 / 4; - uint32_t green = bg_green * 3 / 4; - uint32_t blue = bg_blue * 3 / 4; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); - } - } - fuzzpos = fuzz; + fuzz %= FUZZTABLE; } } }; From 0f0859b0b2d8f82c89ea1674b6ecc999934ae659 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 12 Jun 2016 22:54:23 +0200 Subject: [PATCH 037/100] Special colormap support for when no hw accel is available --- src/r_draw.h | 18 +++++ src/r_draw_rgba.cpp | 152 +++++++++++++++++++++++++++++++++++++++++++ src/r_main.cpp | 4 +- src/r_swrenderer.cpp | 8 +++ src/r_things.cpp | 2 +- 5 files changed, 181 insertions(+), 3 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index 55ad8a0ca..d09d0ab89 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -581,4 +581,22 @@ public: static void WaitForWorkers(); }; +class ApplySpecialColormapRGBACommand : public DrawerCommand +{ + BYTE *buffer; + int pitch; + int width; + int height; + int start_red; + int start_green; + int start_blue; + int end_red; + int end_green; + int end_blue; + +public: + ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen); + void Execute(DrawerThread *thread) override; +}; + #endif diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index a5d924dfa..d5c275d0e 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -3488,6 +3488,158 @@ public: } }; +ApplySpecialColormapRGBACommand::ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen) +{ + buffer = screen->GetBuffer(); + pitch = screen->GetPitch(); + width = screen->GetWidth(); + height = screen->GetHeight(); + + start_red = (int)(colormap->ColorizeStart[0] * 255); + start_green = (int)(colormap->ColorizeStart[1] * 255); + start_blue = (int)(colormap->ColorizeStart[2] * 255); + end_red = (int)(colormap->ColorizeEnd[0] * 255); + end_green = (int)(colormap->ColorizeEnd[1] * 255); + end_blue = (int)(colormap->ColorizeEnd[2] * 255); +} + +#ifdef NO_SSE +void ApplySpecialColormapRGBACommand::Execute(DrawerThread *thread) +{ + int y = thread->skipped_by_thread(0); + int count = thread->count_for_thread(0, height); + while (count > 0) + { + BYTE *pixels = buffer + y * pitch * 4; + for (int x = 0; x < width; x++) + { + int fg_red = pixels[2]; + int fg_green = pixels[1]; + int fg_blue = pixels[0]; + + int gray = (fg_red * 77 + fg_green * 143 + fg_blue * 37) >> 8; + gray += (gray >> 7); // gray*=256/255 + int inv_gray = 256 - gray; + + int red = clamp((start_red * inv_gray + end_red * gray) >> 8, 0, 255); + int green = clamp((start_green * inv_gray + end_green * gray) >> 8, 0, 255); + int blue = clamp((start_blue * inv_gray + end_blue * gray) >> 8, 0, 255); + + pixels[0] = (BYTE)blue; + pixels[1] = (BYTE)green; + pixels[2] = (BYTE)red; + pixels[3] = 0xff; + + pixels += 4; + } + y += thread->num_cores; + count--; + } +} +#else +void ApplySpecialColormapRGBACommand::Execute(DrawerThread *thread) +{ + int y = thread->skipped_by_thread(0); + int count = thread->count_for_thread(0, height); + __m128i gray_weight = _mm_set_epi16(256, 77, 143, 37, 256, 77, 143, 37); + __m128i start_end = _mm_set_epi16(255, start_red, start_green, start_blue, 255, end_red, end_green, end_blue); + while (count > 0) + { + BYTE *pixels = buffer + y * pitch * 4; + int sse_length = width / 4; + for (int x = 0; x < sse_length; x++) + { + // Unpack to integers: + __m128i p = _mm_loadu_si128((const __m128i*)pixels); + + __m128i p16_0 = _mm_unpacklo_epi8(p, _mm_setzero_si128()); + __m128i p16_1 = _mm_unpackhi_epi8(p, _mm_setzero_si128()); + + // Add gray weighting to colors + __m128i mullo0 = _mm_mullo_epi16(p16_0, gray_weight); + __m128i mullo1 = _mm_mullo_epi16(p16_1, gray_weight); + __m128i p32_0 = _mm_unpacklo_epi16(mullo0, _mm_setzero_si128()); + __m128i p32_1 = _mm_unpackhi_epi16(mullo0, _mm_setzero_si128()); + __m128i p32_2 = _mm_unpacklo_epi16(mullo1, _mm_setzero_si128()); + __m128i p32_3 = _mm_unpackhi_epi16(mullo1, _mm_setzero_si128()); + + // Transpose to get color components in individual vectors: + __m128 tmpx = _mm_castsi128_ps(p32_0); + __m128 tmpy = _mm_castsi128_ps(p32_1); + __m128 tmpz = _mm_castsi128_ps(p32_2); + __m128 tmpw = _mm_castsi128_ps(p32_3); + _MM_TRANSPOSE4_PS(tmpx, tmpy, tmpz, tmpw); + __m128i blue = _mm_castps_si128(tmpx); + __m128i green = _mm_castps_si128(tmpy); + __m128i red = _mm_castps_si128(tmpz); + __m128i alpha = _mm_castps_si128(tmpw); + + // Calculate gray and 256-gray values: + __m128i gray = _mm_srli_epi32(_mm_add_epi32(_mm_add_epi32(red, green), blue), 8); + __m128i inv_gray = _mm_sub_epi32(_mm_set1_epi32(256), gray); + + // p32 = start * inv_gray + end * gray: + __m128i gray0 = _mm_shuffle_epi32(gray, _MM_SHUFFLE(0, 0, 0, 0)); + __m128i gray1 = _mm_shuffle_epi32(gray, _MM_SHUFFLE(1, 1, 1, 1)); + __m128i gray2 = _mm_shuffle_epi32(gray, _MM_SHUFFLE(2, 2, 2, 2)); + __m128i gray3 = _mm_shuffle_epi32(gray, _MM_SHUFFLE(3, 3, 3, 3)); + __m128i inv_gray0 = _mm_shuffle_epi32(inv_gray, _MM_SHUFFLE(0, 0, 0, 0)); + __m128i inv_gray1 = _mm_shuffle_epi32(inv_gray, _MM_SHUFFLE(1, 1, 1, 1)); + __m128i inv_gray2 = _mm_shuffle_epi32(inv_gray, _MM_SHUFFLE(2, 2, 2, 2)); + __m128i inv_gray3 = _mm_shuffle_epi32(inv_gray, _MM_SHUFFLE(3, 3, 3, 3)); + __m128i gray16_0 = _mm_packs_epi32(gray0, inv_gray0); + __m128i gray16_1 = _mm_packs_epi32(gray1, inv_gray1); + __m128i gray16_2 = _mm_packs_epi32(gray2, inv_gray2); + __m128i gray16_3 = _mm_packs_epi32(gray3, inv_gray3); + __m128i gray16_0_mullo = _mm_mullo_epi16(gray16_0, start_end); + __m128i gray16_1_mullo = _mm_mullo_epi16(gray16_1, start_end); + __m128i gray16_2_mullo = _mm_mullo_epi16(gray16_2, start_end); + __m128i gray16_3_mullo = _mm_mullo_epi16(gray16_3, start_end); + __m128i gray16_0_mulhi = _mm_mulhi_epi16(gray16_0, start_end); + __m128i gray16_1_mulhi = _mm_mulhi_epi16(gray16_1, start_end); + __m128i gray16_2_mulhi = _mm_mulhi_epi16(gray16_2, start_end); + __m128i gray16_3_mulhi = _mm_mulhi_epi16(gray16_3, start_end); + p32_0 = _mm_srli_epi32(_mm_add_epi32(_mm_unpacklo_epi16(gray16_0_mullo, gray16_0_mulhi), _mm_unpackhi_epi16(gray16_0_mullo, gray16_0_mulhi)), 8); + p32_1 = _mm_srli_epi32(_mm_add_epi32(_mm_unpacklo_epi16(gray16_1_mullo, gray16_1_mulhi), _mm_unpackhi_epi16(gray16_1_mullo, gray16_1_mulhi)), 8); + p32_2 = _mm_srli_epi32(_mm_add_epi32(_mm_unpacklo_epi16(gray16_2_mullo, gray16_2_mulhi), _mm_unpackhi_epi16(gray16_2_mullo, gray16_2_mulhi)), 8); + p32_3 = _mm_srli_epi32(_mm_add_epi32(_mm_unpacklo_epi16(gray16_3_mullo, gray16_3_mulhi), _mm_unpackhi_epi16(gray16_3_mullo, gray16_3_mulhi)), 8); + + p16_0 = _mm_packs_epi32(p32_0, p32_1); + p16_1 = _mm_packs_epi32(p32_2, p32_3); + p = _mm_packus_epi16(p16_0, p16_1); + + _mm_storeu_si128((__m128i*)pixels, p); + pixels += 16; + } + + for (int x = sse_length * 4; x < width; x++) + { + int fg_red = pixels[2]; + int fg_green = pixels[1]; + int fg_blue = pixels[0]; + + int gray = (fg_red * 77 + fg_green * 143 + fg_blue * 37) >> 8; + gray += (gray >> 7); // gray*=256/255 + int inv_gray = 256 - gray; + + int red = clamp((start_red * inv_gray + end_red * gray) >> 8, 0, 255); + int green = clamp((start_green * inv_gray + end_green * gray) >> 8, 0, 255); + int blue = clamp((start_blue * inv_gray + end_blue * gray) >> 8, 0, 255); + + pixels[0] = (BYTE)blue; + pixels[1] = (BYTE)green; + pixels[2] = (BYTE)red; + pixels[3] = 0xff; + + pixels += 4; + } + + y += thread->num_cores; + count--; + } +} +#endif + ///////////////////////////////////////////////////////////////////////////// void R_BeginDrawerCommands() diff --git a/src/r_main.cpp b/src/r_main.cpp index c1b78303b..2eb0ce141 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -459,7 +459,7 @@ void R_SetupColormap(player_t *player) if (player->fixedcolormap >= 0 && player->fixedcolormap < (int)SpecialColormaps.Size()) { realfixedcolormap = &SpecialColormaps[player->fixedcolormap]; - if (RenderTarget == screen && (DFrameBuffer *)screen->Accel2D && r_shadercolormaps) + if (RenderTarget == screen && (r_swtruecolor || ((DFrameBuffer *)screen->Accel2D && r_shadercolormaps))) { // Render everything fullbright. The copy to video memory will // apply the special colormap, so it won't be restricted to the @@ -935,7 +935,7 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) // If we don't want shadered colormaps, NULL it now so that the // copy to the screen does not use a special colormap shader. - if (!r_shadercolormaps) + if (!r_shadercolormaps && !r_swtruecolor) { realfixedcolormap = NULL; } diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 11f879c38..c4558bf7c 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -43,6 +43,7 @@ #include "textures/textures.h" #include "r_data/voxels.h" +EXTERN_CVAR(Bool, r_shadercolormaps) class FArchive; void R_SWRSetWindow(int windowSize, int fullWidth, int fullHeight, int stHeight, int trueratio); @@ -166,6 +167,13 @@ void FSoftwareRenderer::RenderView(player_t *player) R_RenderActorView (player->mo); // [RH] Let cameras draw onto textures that were visible this frame. FCanvasTextureInfo::UpdateAll (); + + // Apply special colormap if the target cannot do it + if (realfixedcolormap && r_swtruecolor && !(r_shadercolormaps && screen->Accel2D)) + { + DrawerCommandQueue::QueueCommand(realfixedcolormap, screen); + } + R_EndDrawerCommands(); } diff --git a/src/r_things.cpp b/src/r_things.cpp index 0c5e17b7c..2dc0bdb6c 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -1423,7 +1423,7 @@ void R_DrawPSprite (pspdef_t* psp, int pspnum, AActor *owner, double sx, double } } - if (realfixedcolormap != NULL) + if (realfixedcolormap != NULL && (!r_swtruecolor || (r_shadercolormaps && screen->Accel2D))) { // fixed color vis->Style.BaseColormap = realfixedcolormap; vis->Style.ColormapNum = 0; From cc10c2a97045010453c610e68e4a66cef6e36dd9 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 13 Jun 2016 03:16:48 +0200 Subject: [PATCH 038/100] Fix cameras and kdizd intro for true color mode --- src/r_main.cpp | 4 +-- src/r_swrenderer.cpp | 35 +++++++++++++++++++----- src/textures/canvastexture.cpp | 49 ++++++++++++++++++++++++++++++++-- src/textures/texture.cpp | 36 +++++++++++++++++++++++++ src/textures/textures.h | 18 +++++++++---- 5 files changed, 126 insertions(+), 16 deletions(-) diff --git a/src/r_main.cpp b/src/r_main.cpp index 2eb0ce141..4e5ff1dbd 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -974,6 +974,8 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, R_RenderActorView (actor, dontmaplines); + R_EndDrawerCommands(); + RenderTarget = screen; bRenderingToCanvas = false; R_ExecuteSetViewSize (); @@ -981,8 +983,6 @@ void R_RenderViewToCanvas (AActor *actor, DCanvas *canvas, R_SetupBuffer (); screen->Unlock (); - R_EndDrawerCommands(); - viewactive = savedviewactive; r_swtruecolor = savedoutputformat; diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index c4558bf7c..556323df5 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -87,11 +87,17 @@ void FSoftwareRenderer::PrecacheTexture(FTexture *tex, int cache) if (cache & FTextureManager::HIT_Columnmode) { const FTexture::Span *spanp; - tex->GetColumn(0, &spanp); + /*if (r_swtruecolor) + tex->GetColumnBgra(0, &spanp); + else*/ + tex->GetColumn(0, &spanp); } else if (cache != 0) { - tex->GetPixels (); + if (r_swtruecolor) + tex->GetPixels(); + else + tex->GetPixels (); } else { @@ -328,8 +334,8 @@ void FSoftwareRenderer::CopyStackedViewParameters() void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoint, int fov) { - BYTE *Pixels = const_cast(tex->GetPixels()); - DSimpleCanvas *Canvas = tex->GetCanvas(); + BYTE *Pixels = r_swtruecolor ? (BYTE*)tex->GetPixelsBgra() : (BYTE*)tex->GetPixels(); + DSimpleCanvas *Canvas = r_swtruecolor ? tex->GetCanvasBgra() : tex->GetCanvas(); // curse Doom's overuse of global variables in the renderer. // These get clobbered by rendering to a camera texture but they need to be preserved so the final rendering can be done with the correct palette. @@ -340,13 +346,28 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin R_SetFOV ((double)fov); R_RenderViewToCanvas (viewpoint, Canvas, 0, 0, tex->GetWidth(), tex->GetHeight(), tex->bFirstUpdate); R_SetFOV (savedfov); - if (Pixels == Canvas->GetBuffer()) + + if (Canvas->IsBgra()) { - FTexture::FlipSquareBlockRemap (Pixels, tex->GetWidth(), tex->GetHeight(), GPalette.Remap); + if (Pixels == Canvas->GetBuffer()) + { + FTexture::FlipSquareBlockBgra((uint32_t*)Pixels, tex->GetWidth(), tex->GetHeight()); + } + else + { + FTexture::FlipNonSquareBlockBgra((uint32_t*)Pixels, (const uint32_t*)Canvas->GetBuffer(), tex->GetWidth(), tex->GetHeight(), Canvas->GetPitch()); + } } else { - FTexture::FlipNonSquareBlockRemap (Pixels, Canvas->GetBuffer(), tex->GetWidth(), tex->GetHeight(), Canvas->GetPitch(), GPalette.Remap); + if (Pixels == Canvas->GetBuffer()) + { + FTexture::FlipSquareBlockRemap(Pixels, tex->GetWidth(), tex->GetHeight(), GPalette.Remap); + } + else + { + FTexture::FlipNonSquareBlockRemap(Pixels, Canvas->GetBuffer(), tex->GetWidth(), tex->GetHeight(), Canvas->GetPitch(), GPalette.Remap); + } } tex->SetUpdated(); fixedcolormap = savecolormap; diff --git a/src/textures/canvastexture.cpp b/src/textures/canvastexture.cpp index 7242149a4..a72546d78 100644 --- a/src/textures/canvastexture.cpp +++ b/src/textures/canvastexture.cpp @@ -53,7 +53,6 @@ FCanvasTexture::FCanvasTexture (const char *name, int width, int height) DummySpans[1].TopOffset = 0; DummySpans[1].Length = 0; UseType = TEX_Wall; - Canvas = NULL; bNeedsUpdate = true; bDidUpdate = false; bHasCanvas = true; @@ -101,6 +100,16 @@ const BYTE *FCanvasTexture::GetPixels () return Pixels; } +const uint32_t *FCanvasTexture::GetPixelsBgra() +{ + bNeedsUpdate = true; + if (CanvasBgra == NULL) + { + MakeTextureBgra(); + } + return PixelsBgra; +} + void FCanvasTexture::MakeTexture () { Canvas = new DSimpleCanvas (Width, Height, false); @@ -123,21 +132,57 @@ void FCanvasTexture::MakeTexture () memset (Pixels+Width*Height/2, 255, Width*Height/2); } +void FCanvasTexture::MakeTextureBgra() +{ + CanvasBgra = new DSimpleCanvas(Width, Height, true); + CanvasBgra->Lock(); + GC::AddSoftRoot(CanvasBgra); + + if (Width != Height || Width != CanvasBgra->GetPitch()) + { + PixelsBgra = new uint32_t[Width*Height]; + bPixelsAllocatedBgra = true; + } + else + { + PixelsBgra = (uint32_t*)CanvasBgra->GetBuffer(); + bPixelsAllocatedBgra = false; + } + + // Draw a special "unrendered" initial texture into the buffer. + memset(PixelsBgra, 0, Width*Height / 2 * 4); + memset(PixelsBgra + Width*Height / 2, 255, Width*Height / 2 * 4); +} + void FCanvasTexture::Unload () { if (bPixelsAllocated) { - if (Pixels != NULL) delete [] Pixels; + if (Pixels != NULL) delete[] Pixels; bPixelsAllocated = false; Pixels = NULL; } + if (bPixelsAllocatedBgra) + { + if (PixelsBgra != NULL) delete[] PixelsBgra; + bPixelsAllocatedBgra = false; + PixelsBgra = NULL; + } + if (Canvas != NULL) { GC::DelSoftRoot(Canvas); Canvas->Destroy(); Canvas = NULL; } + + if (CanvasBgra != NULL) + { + GC::DelSoftRoot(CanvasBgra); + CanvasBgra->Destroy(); + CanvasBgra = NULL; + } } bool FCanvasTexture::CheckModified () diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index d50081062..28a3b9333 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -410,6 +410,29 @@ void FTexture::FlipSquareBlock (BYTE *block, int x, int y) } } +void FTexture::FlipSquareBlockBgra(uint32_t *block, int x, int y) +{ + int i, j; + + if (x != y) return; + + for (i = 0; i < x; ++i) + { + uint32_t *corner = block + x*i + i; + int count = x - i; + if (count & 1) + { + count--; + swapvalues(corner[count], corner[count*x]); + } + for (j = 0; j < count; j += 2) + { + swapvalues(corner[j], corner[j*x]); + swapvalues(corner[j + 1], corner[(j + 1)*x]); + } + } +} + void FTexture::FlipSquareBlockRemap (BYTE *block, int x, int y, const BYTE *remap) { int i, j; @@ -453,6 +476,19 @@ void FTexture::FlipNonSquareBlock (BYTE *dst, const BYTE *src, int x, int y, int } } +void FTexture::FlipNonSquareBlockBgra(uint32_t *dst, const uint32_t *src, int x, int y, int srcpitch) +{ + int i, j; + + for (i = 0; i < x; ++i) + { + for (j = 0; j < y; ++j) + { + dst[i*y + j] = src[i + j*srcpitch]; + } + } +} + void FTexture::FlipNonSquareBlockRemap (BYTE *dst, const BYTE *src, int x, int y, int srcpitch, const BYTE *remap) { int i, j; diff --git a/src/textures/textures.h b/src/textures/textures.h index 0d066eff5..872c83b1a 100644 --- a/src/textures/textures.h +++ b/src/textures/textures.h @@ -274,8 +274,10 @@ private: public: static void FlipSquareBlock (BYTE *block, int x, int y); + static void FlipSquareBlockBgra (uint32_t *block, int x, int y); static void FlipSquareBlockRemap (BYTE *block, int x, int y, const BYTE *remap); static void FlipNonSquareBlock (BYTE *blockto, const BYTE *blockfrom, int x, int y, int srcpitch); + static void FlipNonSquareBlockBgra (uint32_t *blockto, const uint32_t *blockfrom, int x, int y, int srcpitch); static void FlipNonSquareBlockRemap (BYTE *blockto, const BYTE *blockfrom, int x, int y, int srcpitch, const BYTE *remap); friend class D3DTex; @@ -518,21 +520,27 @@ public: const BYTE *GetColumn (unsigned int column, const Span **spans_out); const BYTE *GetPixels (); + const uint32_t *GetPixelsBgra() override; void Unload (); bool CheckModified (); void NeedUpdate() { bNeedsUpdate=true; } void SetUpdated() { bNeedsUpdate = false; bDidUpdate = true; bFirstUpdate = false; } DSimpleCanvas *GetCanvas() { return Canvas; } + DSimpleCanvas *GetCanvasBgra() { return CanvasBgra; } void MakeTexture (); + void MakeTextureBgra (); protected: - DSimpleCanvas *Canvas; - BYTE *Pixels; + DSimpleCanvas *Canvas = nullptr; + DSimpleCanvas *CanvasBgra = nullptr; + BYTE *Pixels = nullptr; + uint32_t *PixelsBgra = nullptr; Span DummySpans[2]; - bool bNeedsUpdate; - bool bDidUpdate; - bool bPixelsAllocated; + bool bNeedsUpdate = true; + bool bDidUpdate = false; + bool bPixelsAllocated = false; + bool bPixelsAllocatedBgra = false; public: bool bFirstUpdate; From e31331bed265925a2e03d66658863e9c26f2ca26 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 13 Jun 2016 19:09:48 +0200 Subject: [PATCH 039/100] Sloped plane adjustments --- src/r_draw.cpp | 4 +- src/r_plane.cpp | 126 +++++------------------------------------------- 2 files changed, 14 insertions(+), 116 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 70b3893f4..ec7313c4f 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -2325,7 +2325,7 @@ void R_InitColumnDrawers () R_FillColumnHoriz = R_FillColumnHorizP_RGBA_C; R_DrawFogBoundary = R_DrawFogBoundary_RGBA; - R_MapTiltedPlane = R_MapColoredPlane_RGBA; + R_MapTiltedPlane = R_MapTiltedPlane_RGBA; R_MapColoredPlane = R_MapColoredPlane_RGBA; R_DrawParticle = R_DrawParticle_RGBA; @@ -2422,7 +2422,7 @@ void R_InitColumnDrawers () R_FillColumnHoriz = R_FillColumnHorizP_C; R_DrawFogBoundary = R_DrawFogBoundary_C; - R_MapTiltedPlane = R_MapColoredPlane_C; + R_MapTiltedPlane = R_MapTiltedPlane_C; R_MapColoredPlane = R_MapColoredPlane_C; R_DrawParticle = R_DrawParticle_C; diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 05fce79a6..1cde16071 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -480,124 +480,22 @@ void R_MapTiltedPlane_C (int y, int x1) void R_MapTiltedPlane_RGBA (int y, int x1) { int x2 = spanend[y]; - int width = x2 - x1; - double iz, uz, vz; - uint32_t *fb; - DWORD u, v; - int i; - iz = plane_sz[2] + plane_sz[1]*(centery-y) + plane_sz[0]*(x1-centerx); + // Slopes are broken currently in master. + // Until R_DrawTiltedPlane is fixed we are just going to fill with a solid color. - // Lighting is simple. It's just linear interpolation from start to end - if (plane_shade) + uint32_t *source = (uint32_t*)ds_source; + int source_width = 1 << ds_xbits; + int source_height = 1 << ds_ybits; + + uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; + + int count = x2 - x1 + 1; + while (count > 0) { - uz = (iz + plane_sz[0]*width) * planelightfloat; - vz = iz * planelightfloat; - R_CalcTiltedLighting (vz, uz, width); + *(dest++) = source[0]; + count--; } - - uz = plane_su[2] + plane_su[1]*(centery-y) + plane_su[0]*(x1-centerx); - vz = plane_sv[2] + plane_sv[1]*(centery-y) + plane_sv[0]*(x1-centerx); - - fb = ylookup[y] + x1 + (uint32_t*)dc_destorg; - - BYTE vshift = 32 - ds_ybits; - BYTE ushift = vshift - ds_xbits; - int umask = ((1 << ds_xbits) - 1) << ds_ybits; - -#if 0 // The "perfect" reference version of this routine. Pretty slow. - // Use it only to see how things are supposed to look. - i = 0; - do - { - double z = 1.f/iz; - - u = SQWORD(uz*z) + pviewx; - v = SQWORD(vz*z) + pviewy; - R_SetDSColorMapLight(tiltlighting[i], 0, 0); - fb[i++] = ds_colormap[ds_source[(v >> vshift) | ((u >> ushift) & umask)]]; - iz += plane_sz[0]; - uz += plane_su[0]; - vz += plane_sv[0]; - } while (--width >= 0); -#else -//#define SPANSIZE 32 -//#define INVSPAN 0.03125f -//#define SPANSIZE 8 -//#define INVSPAN 0.125f -#define SPANSIZE 16 -#define INVSPAN 0.0625f - - double startz = 1.f/iz; - double startu = uz*startz; - double startv = vz*startz; - double izstep, uzstep, vzstep; - - izstep = plane_sz[0] * SPANSIZE; - uzstep = plane_su[0] * SPANSIZE; - vzstep = plane_sv[0] * SPANSIZE; - x1 = 0; - width++; - - while (width >= SPANSIZE) - { - iz += izstep; - uz += uzstep; - vz += vzstep; - - double endz = 1.f/iz; - double endu = uz*endz; - double endv = vz*endz; - DWORD stepu = SQWORD((endu - startu) * INVSPAN); - DWORD stepv = SQWORD((endv - startv) * INVSPAN); - u = SQWORD(startu) + pviewx; - v = SQWORD(startv) + pviewy; - - for (i = SPANSIZE-1; i >= 0; i--) - { - fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); - x1++; - u += stepu; - v += stepv; - } - startu = endu; - startv = endv; - width -= SPANSIZE; - } - if (width > 0) - { - if (width == 1) - { - u = SQWORD(startu); - v = SQWORD(startv); - fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); - } - else - { - double left = width; - iz += plane_sz[0] * left; - uz += plane_su[0] * left; - vz += plane_sv[0] * left; - - double endz = 1.f/iz; - double endu = uz*endz; - double endv = vz*endz; - left = 1.f/left; - DWORD stepu = SQWORD((endu - startu) * left); - DWORD stepv = SQWORD((endv - startv) * left); - u = SQWORD(startu) + pviewx; - v = SQWORD(startv) + pviewy; - - for (; width != 0; width--) - { - fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); - x1++; - u += stepu; - v += stepv; - } - } - } -#endif } //========================================================================== From 3ce2d8365dd6a91c068a20b0caf4b683634ceba3 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 13 Jun 2016 20:01:31 +0200 Subject: [PATCH 040/100] Fix HUD colors when hw2d is off --- src/r_draw_rgba.cpp | 9 ++++++--- src/r_drawt_rgba.cpp | 19 +++++++++++++------ src/v_draw.cpp | 13 +++++++++---- 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index d5c275d0e..7e9f85117 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -325,11 +325,11 @@ public: { int pitch = dc_pitch * thread->num_cores; - BYTE color = dc_color; + uint32_t color = shade_pal_index_simple(dc_color, light); do { - *dest = shade_pal_index_simple(color, light); + *dest = color; dest += pitch; } while (--count); } @@ -629,6 +629,7 @@ class DrawAddColumnRGBACommand : public DrawerCommand ShadeConstants dc_shade_constants; fixed_t dc_srcalpha; fixed_t dc_destalpha; + BYTE *dc_colormap; public: DrawAddColumnRGBACommand() @@ -643,6 +644,7 @@ public: dc_shade_constants = ::dc_shade_constants; dc_srcalpha = ::dc_srcalpha; dc_destalpha = ::dc_destalpha; + dc_colormap = ::dc_colormap; } void Execute(DrawerThread *thread) override @@ -667,13 +669,14 @@ public: uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; + BYTE *colormap = dc_colormap; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); + uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 5f0fc4156..cd124ac63 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -512,6 +512,7 @@ class RtAdd1colRGBACommand : public DrawerCommand ShadeConstants dc_shade_constants; fixed_t dc_srcalpha; fixed_t dc_destalpha; + BYTE *dc_colormap; public: RtAdd1colRGBACommand(int hx, int sx, int yl, int yh) @@ -527,6 +528,7 @@ public: dc_shade_constants = ::dc_shade_constants; dc_srcalpha = ::dc_srcalpha; dc_destalpha = ::dc_destalpha; + dc_colormap = ::dc_colormap; } void Execute(DrawerThread *thread) override @@ -548,12 +550,13 @@ public: uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; + BYTE *colormap = dc_colormap; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(*source, light, shade_constants); + uint32_t fg = shade_pal_index(colormap[*source], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -583,6 +586,7 @@ class RtAdd4colsRGBACommand : public DrawerCommand int dc_pitch; fixed_t dc_light; ShadeConstants dc_shade_constants; + BYTE *dc_colormap; public: RtAdd4colsRGBACommand(int sx, int yl, int yh) @@ -595,6 +599,7 @@ public: dc_pitch = ::dc_pitch; dc_light = ::dc_light; dc_shade_constants = ::dc_shade_constants; + dc_colormap = ::dc_colormap; } #ifdef NO_SSE @@ -617,6 +622,7 @@ public: uint32_t light = calc_light_multiplier(dc_light); ShadeConstants shade_constants = dc_shade_constants; + BYTE *colormap = dc_colormap; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -624,7 +630,7 @@ public: do { for (int i = 0; i < 4; i++) { - uint32_t fg = shade_pal_index(source[i], light, shade_constants); + uint32_t fg = shade_pal_index(colormap[source[i]], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -664,6 +670,7 @@ public: uint32_t light = calc_light_multiplier(dc_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; + BYTE *colormap = dc_colormap; uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); @@ -678,10 +685,10 @@ public: __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); do { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; // shade_pal_index: __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); diff --git a/src/v_draw.cpp b/src/v_draw.cpp index c2dbf31c5..d03853c11 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -1095,9 +1095,10 @@ void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 real } if (IsBgra()) { + uint32_t fillColor = GPalette.BaseColors[palColor].d; uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0; for (int i = 0; i <= deltaX; i++) - spot[i] = palColor; + spot[i] = fillColor; } else { @@ -1108,11 +1109,12 @@ void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 real { // vertical line if (IsBgra()) { + uint32_t fillColor = GPalette.BaseColors[palColor].d; uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0; int pitch = GetPitch(); do { - *spot = palColor; + *spot = fillColor; spot += pitch; } while (--deltaY != 0); } @@ -1131,11 +1133,12 @@ void DCanvas::DrawLine(int x0, int y0, int x1, int y1, int palColor, uint32 real { // diagonal line. if (IsBgra()) { + uint32_t fillColor = GPalette.BaseColors[palColor].d; uint32_t *spot = (uint32_t*)GetBuffer() + y0*GetPitch() + x0; int advance = GetPitch() + xDir; do { - *spot = palColor; + *spot = fillColor; spot += advance; } while (--deltaY != 0); } @@ -1299,12 +1302,14 @@ void DCanvas::Clear (int left, int top, int right, int bottom, int palcolor, uin if (IsBgra()) { + uint32_t fill_color = GPalette.BaseColors[palcolor]; + uint32_t *dest = (uint32_t*)Buffer + top * Pitch + left; x = right - left; for (y = top; y < bottom; y++) { for (int i = 0; i < x; i++) - dest[i] = palcolor; + dest[i] = fill_color; dest += Pitch; } } From 8ba6a4f17501e34db5b644567fcd40e06502017c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 13 Jun 2016 21:39:55 +0200 Subject: [PATCH 041/100] Precache, Unload and FillSimplePoly bug fix --- src/g_strife/strife_sbar.cpp | 5 ----- src/menu/playerdisplay.cpp | 5 ----- src/r_swrenderer.cpp | 4 ++-- src/textures/automaptexture.cpp | 1 + src/textures/buildtexture.cpp | 12 ------------ src/textures/canvastexture.cpp | 2 ++ src/textures/ddstexture.cpp | 1 + src/textures/flattexture.cpp | 1 + src/textures/imgztexture.cpp | 1 + src/textures/jpegtexture.cpp | 3 +-- src/textures/multipatchtexture.cpp | 1 + src/textures/patchtexture.cpp | 1 + src/textures/pcxtexture.cpp | 1 + src/textures/pngtexture.cpp | 3 +-- src/textures/rawpagetexture.cpp | 1 + src/textures/texture.cpp | 20 ++++++++++++-------- src/textures/textures.h | 6 ++---- src/textures/tgatexture.cpp | 1 + src/textures/warptexture.cpp | 1 + src/v_draw.cpp | 2 +- src/v_font.cpp | 2 ++ src/v_video.cpp | 11 ----------- 22 files changed, 33 insertions(+), 52 deletions(-) diff --git a/src/g_strife/strife_sbar.cpp b/src/g_strife/strife_sbar.cpp index bcdf624d7..eb3fa2608 100644 --- a/src/g_strife/strife_sbar.cpp +++ b/src/g_strife/strife_sbar.cpp @@ -35,7 +35,6 @@ public: const BYTE *GetColumn (unsigned int column, const Span **spans_out); const BYTE *GetPixels (); bool CheckModified (); - void Unload (); void SetVial (int level); @@ -90,10 +89,6 @@ bool FHealthBar::CheckModified () return NeedRefresh; } -void FHealthBar::Unload () -{ -} - const BYTE *FHealthBar::GetColumn (unsigned int column, const Span **spans_out) { if (NeedRefresh) diff --git a/src/menu/playerdisplay.cpp b/src/menu/playerdisplay.cpp index c3d11a43a..16671975a 100644 --- a/src/menu/playerdisplay.cpp +++ b/src/menu/playerdisplay.cpp @@ -78,7 +78,6 @@ public: const BYTE *GetColumn(unsigned int column, const Span **spans_out); const BYTE *GetPixels(); - void Unload(); bool CheckModified(); protected: @@ -212,10 +211,6 @@ bool FBackdropTexture::CheckModified() return LastRenderTic != gametic; } -void FBackdropTexture::Unload() -{ -} - //============================================================================= // // diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 556323df5..5be41660e 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -87,9 +87,9 @@ void FSoftwareRenderer::PrecacheTexture(FTexture *tex, int cache) if (cache & FTextureManager::HIT_Columnmode) { const FTexture::Span *spanp; - /*if (r_swtruecolor) + if (r_swtruecolor) tex->GetColumnBgra(0, &spanp); - else*/ + else tex->GetColumn(0, &spanp); } else if (cache != 0) diff --git a/src/textures/automaptexture.cpp b/src/textures/automaptexture.cpp index 67d68b9fe..9aac379ef 100644 --- a/src/textures/automaptexture.cpp +++ b/src/textures/automaptexture.cpp @@ -122,6 +122,7 @@ void FAutomapTexture::Unload () delete[] Pixels; Pixels = NULL; } + FTexture::Unload(); } //========================================================================== diff --git a/src/textures/buildtexture.cpp b/src/textures/buildtexture.cpp index bfcc6333d..1155dacc4 100644 --- a/src/textures/buildtexture.cpp +++ b/src/textures/buildtexture.cpp @@ -56,7 +56,6 @@ public: const BYTE *GetColumn (unsigned int column, const Span **spans_out); const BYTE *GetPixels (); - void Unload (); protected: const BYTE *Pixels; @@ -103,17 +102,6 @@ FBuildTexture::~FBuildTexture () // //========================================================================== -void FBuildTexture::Unload () -{ - // Nothing to do, since the pixels are accessed from memory-mapped files directly -} - -//========================================================================== -// -// -// -//========================================================================== - const BYTE *FBuildTexture::GetPixels () { return Pixels; diff --git a/src/textures/canvastexture.cpp b/src/textures/canvastexture.cpp index a72546d78..109d927ab 100644 --- a/src/textures/canvastexture.cpp +++ b/src/textures/canvastexture.cpp @@ -183,6 +183,8 @@ void FCanvasTexture::Unload () CanvasBgra->Destroy(); CanvasBgra = NULL; } + + FTexture::Unload(); } bool FCanvasTexture::CheckModified () diff --git a/src/textures/ddstexture.cpp b/src/textures/ddstexture.cpp index 31e748022..fb4de34c5 100644 --- a/src/textures/ddstexture.cpp +++ b/src/textures/ddstexture.cpp @@ -401,6 +401,7 @@ void FDDSTexture::Unload () delete[] Pixels; Pixels = NULL; } + FTexture::Unload(); } //========================================================================== diff --git a/src/textures/flattexture.cpp b/src/textures/flattexture.cpp index 840d53aaf..08e0d1221 100644 --- a/src/textures/flattexture.cpp +++ b/src/textures/flattexture.cpp @@ -138,6 +138,7 @@ void FFlatTexture::Unload () delete[] Pixels; Pixels = NULL; } + FTexture::Unload(); } //========================================================================== diff --git a/src/textures/imgztexture.cpp b/src/textures/imgztexture.cpp index 1c262d707..04932d4bf 100644 --- a/src/textures/imgztexture.cpp +++ b/src/textures/imgztexture.cpp @@ -142,6 +142,7 @@ void FIMGZTexture::Unload () delete[] Pixels; Pixels = NULL; } + FTexture::Unload(); } //========================================================================== diff --git a/src/textures/jpegtexture.cpp b/src/textures/jpegtexture.cpp index a37eff6c3..3b5359846 100644 --- a/src/textures/jpegtexture.cpp +++ b/src/textures/jpegtexture.cpp @@ -196,7 +196,6 @@ public: protected: BYTE *Pixels; - std::vector PixelsBgra; Span DummySpans[2]; void MakeTexture (); @@ -300,7 +299,7 @@ void FJPEGTexture::Unload () { delete[] Pixels; Pixels = NULL; - PixelsBgra.clear(); + FTexture::Unload(); } //========================================================================== diff --git a/src/textures/multipatchtexture.cpp b/src/textures/multipatchtexture.cpp index b0db481a8..6ae45c785 100644 --- a/src/textures/multipatchtexture.cpp +++ b/src/textures/multipatchtexture.cpp @@ -362,6 +362,7 @@ void FMultiPatchTexture::Unload () delete[] Pixels; Pixels = NULL; } + FTexture::Unload(); } //========================================================================== diff --git a/src/textures/patchtexture.cpp b/src/textures/patchtexture.cpp index 423ce4deb..8388515c0 100644 --- a/src/textures/patchtexture.cpp +++ b/src/textures/patchtexture.cpp @@ -184,6 +184,7 @@ void FPatchTexture::Unload () delete[] Pixels; Pixels = NULL; } + FTexture::Unload(); } //========================================================================== diff --git a/src/textures/pcxtexture.cpp b/src/textures/pcxtexture.cpp index 0ec5d2933..42a13b85a 100644 --- a/src/textures/pcxtexture.cpp +++ b/src/textures/pcxtexture.cpp @@ -191,6 +191,7 @@ void FPCXTexture::Unload () delete[] Pixels; Pixels = NULL; } + FTexture::Unload(); } //========================================================================== diff --git a/src/textures/pngtexture.cpp b/src/textures/pngtexture.cpp index 95f7aca75..206797a34 100644 --- a/src/textures/pngtexture.cpp +++ b/src/textures/pngtexture.cpp @@ -67,7 +67,6 @@ protected: FString SourceFile; BYTE *Pixels; - std::vector PixelsBgra; Span **Spans; BYTE BitDepth; @@ -382,7 +381,7 @@ void FPNGTexture::Unload () { delete[] Pixels; Pixels = NULL; - PixelsBgra.clear(); + FTexture::Unload(); } //========================================================================== diff --git a/src/textures/rawpagetexture.cpp b/src/textures/rawpagetexture.cpp index 1402f8844..69313fd1c 100644 --- a/src/textures/rawpagetexture.cpp +++ b/src/textures/rawpagetexture.cpp @@ -206,6 +206,7 @@ void FRawPageTexture::Unload () delete[] Pixels; Pixels = NULL; } + FTexture::Unload(); } //========================================================================== diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index 28a3b9333..0030719cb 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -176,6 +176,11 @@ FTexture::~FTexture () KillNative(); } +void FTexture::Unload() +{ + PixelsBgra = std::vector(); +} + const uint32_t *FTexture::GetColumnBgra(unsigned int column, const Span **spans_out) { const uint32_t *pixels = GetPixelsBgra(); @@ -189,16 +194,19 @@ const uint32_t *FTexture::GetColumnBgra(unsigned int column, const Span **spans_ const uint32_t *FTexture::GetPixelsBgra() { - if (BgraPixels.empty()) + if (PixelsBgra.empty()) { + GetColumn(0, nullptr); const BYTE *indices = GetPixels(); - BgraPixels.resize(Width * Height); + if (indices == nullptr) + return nullptr; + PixelsBgra.resize(Width * Height); for (int i = 0; i < Width * Height; i++) { - BgraPixels[i] = GPalette.BaseColors[indices[i]].d; + PixelsBgra[i] = GPalette.BaseColors[indices[i]].d; } } - return BgraPixels.data(); + return PixelsBgra.data(); } bool FTexture::CheckModified () @@ -642,10 +650,6 @@ FDummyTexture::FDummyTexture () UseType = TEX_Null; } -void FDummyTexture::Unload () -{ -} - void FDummyTexture::SetSize (int width, int height) { Width = width; diff --git a/src/textures/textures.h b/src/textures/textures.h index 872c83b1a..38d1ef487 100644 --- a/src/textures/textures.h +++ b/src/textures/textures.h @@ -192,7 +192,7 @@ public: virtual FTexture *GetRedirect(bool wantwarped); virtual FTexture *GetRawTexture(); // for FMultiPatchTexture to override - virtual void Unload () = 0; + virtual void Unload (); // Returns the native pixel format for this image virtual FTextureFormat GetFormat(); @@ -269,8 +269,7 @@ protected: Rotations = other->Rotations; } -private: - std::vector BgraPixels; + std::vector PixelsBgra; public: static void FlipSquareBlock (BYTE *block, int x, int y); @@ -472,7 +471,6 @@ public: FDummyTexture (); const BYTE *GetColumn (unsigned int column, const Span **spans_out); const BYTE *GetPixels (); - void Unload (); void SetSize (int width, int height); }; diff --git a/src/textures/tgatexture.cpp b/src/textures/tgatexture.cpp index b208a51a3..5e76a63b2 100644 --- a/src/textures/tgatexture.cpp +++ b/src/textures/tgatexture.cpp @@ -181,6 +181,7 @@ void FTGATexture::Unload () delete[] Pixels; Pixels = NULL; } + FTexture::Unload(); } //========================================================================== diff --git a/src/textures/warptexture.cpp b/src/textures/warptexture.cpp index a8a2ddb9e..b6977dd77 100644 --- a/src/textures/warptexture.cpp +++ b/src/textures/warptexture.cpp @@ -74,6 +74,7 @@ void FWarpTexture::Unload () Spans = NULL; } SourcePic->Unload (); + FTexture::Unload(); } bool FWarpTexture::CheckModified () diff --git a/src/v_draw.cpp b/src/v_draw.cpp index d03853c11..02ba591b6 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -1403,7 +1403,7 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, R_SetSpanColormap(colormap, clamp(shade >> FRACBITS, 0, NUMCOLORMAPS - 1)); else R_SetSpanColormap(&identitycolormap, 0); - R_SetSpanSource(tex->GetPixels()); + R_SetSpanSource(r_swtruecolor ? (const BYTE*)tex->GetPixelsBgra() : tex->GetPixels()); scalex = double(1u << (32 - ds_xbits)) / scalex; scaley = double(1u << (32 - ds_ybits)) / scaley; ds_xstep = xs_RoundToInt(cosrot * scalex); diff --git a/src/v_font.cpp b/src/v_font.cpp index 052074d11..ef9b69dd1 100644 --- a/src/v_font.cpp +++ b/src/v_font.cpp @@ -1662,6 +1662,7 @@ void FFontChar1::Unload () delete[] Pixels; Pixels = NULL; } + FTexture::Unload(); } //========================================================================== @@ -1723,6 +1724,7 @@ void FFontChar2::Unload () delete[] Pixels; Pixels = NULL; } + FTexture::Unload(); } //========================================================================== diff --git a/src/v_video.cpp b/src/v_video.cpp index e58638121..2cf04a29d 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -117,7 +117,6 @@ public: const BYTE *GetColumn(unsigned int column, const Span **spans_out); const BYTE *GetPixels(); - void Unload(); bool CheckModified(); void SetTranslation(int num); @@ -1076,16 +1075,6 @@ void FPaletteTester::SetTranslation(int num) } } -//========================================================================== -// -// FPaletteTester :: Unload -// -//========================================================================== - -void FPaletteTester::Unload() -{ -} - //========================================================================== // // FPaletteTester :: GetColumn From 69b2fa72e86b180351a70a95243c1b7484f8cec9 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 13 Jun 2016 23:10:54 +0200 Subject: [PATCH 042/100] Moved RGBA draw stuff to its own header file --- src/r_draw.cpp | 91 +++++++------- src/r_draw.h | 239 ------------------------------------- src/r_draw_rgba.cpp | 39 +++--- src/r_draw_rgba.h | 276 +++++++++++++++++++++++++++++++++++++++++++ src/r_drawt_rgba.cpp | 95 +++++++-------- src/r_main.cpp | 1 + src/r_swrenderer.cpp | 1 + src/r_things.cpp | 1 + 8 files changed, 393 insertions(+), 350 deletions(-) create mode 100644 src/r_draw_rgba.h diff --git a/src/r_draw.cpp b/src/r_draw.cpp index ec7313c4f..552e5ff13 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -39,6 +39,7 @@ #include "v_palette.h" #include "r_data/colormaps.h" #include "r_plane.h" +#include "r_draw_rgba.h" #include "gi.h" #include "stats.h" @@ -2295,34 +2296,34 @@ void R_InitColumnDrawers () domvline4_saved = domvline4; } - R_DrawColumnHoriz = R_DrawColumnHorizP_RGBA_C; - R_DrawColumn = R_DrawColumnP_RGBA_C; - R_DrawFuzzColumn = R_DrawFuzzColumnP_RGBA_C; - R_DrawTranslatedColumn = R_DrawTranslatedColumnP_RGBA_C; - R_DrawShadedColumn = R_DrawShadedColumnP_RGBA_C; - R_DrawSpanMasked = R_DrawSpanMaskedP_RGBA_C; - R_DrawSpan = R_DrawSpanP_RGBA_C; + R_DrawColumnHoriz = R_DrawColumnHorizP_RGBA; + R_DrawColumn = R_DrawColumnP_RGBA; + R_DrawFuzzColumn = R_DrawFuzzColumnP_RGBA; + R_DrawTranslatedColumn = R_DrawTranslatedColumnP_RGBA; + R_DrawShadedColumn = R_DrawShadedColumnP_RGBA; + R_DrawSpanMasked = R_DrawSpanMaskedP_RGBA; + R_DrawSpan = R_DrawSpanP_RGBA; - R_DrawSpanTranslucent = R_DrawSpanTranslucentP_RGBA_C; - R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_RGBA_C; - R_DrawSpanAddClamp = R_DrawSpanAddClampP_RGBA_C; - R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_RGBA_C; + R_DrawSpanTranslucent = R_DrawSpanTranslucentP_RGBA; + R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_RGBA; + R_DrawSpanAddClamp = R_DrawSpanAddClampP_RGBA; + R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_RGBA; R_FillColumn = R_FillColumnP_RGBA; - R_FillAddColumn = R_FillAddColumn_RGBA_C; + R_FillAddColumn = R_FillAddColumn_RGBA; R_FillAddClampColumn = R_FillAddClampColumn_RGBA; R_FillSubClampColumn = R_FillSubClampColumn_RGBA; R_FillRevSubClampColumn = R_FillRevSubClampColumn_RGBA; - R_DrawAddColumn = R_DrawAddColumnP_RGBA_C; - R_DrawTlatedAddColumn = R_DrawTlatedAddColumnP_RGBA_C; - R_DrawAddClampColumn = R_DrawAddClampColumnP_RGBA_C; - R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumnP_RGBA_C; - R_DrawSubClampColumn = R_DrawSubClampColumnP_RGBA_C; - R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumnP_RGBA_C; - R_DrawRevSubClampColumn = R_DrawRevSubClampColumnP_RGBA_C; - R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumnP_RGBA_C; + R_DrawAddColumn = R_DrawAddColumnP_RGBA; + R_DrawTlatedAddColumn = R_DrawTlatedAddColumnP_RGBA; + R_DrawAddClampColumn = R_DrawAddClampColumnP_RGBA; + R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumnP_RGBA; + R_DrawSubClampColumn = R_DrawSubClampColumnP_RGBA; + R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumnP_RGBA; + R_DrawRevSubClampColumn = R_DrawRevSubClampColumnP_RGBA; + R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumnP_RGBA; R_FillSpan = R_FillSpan_RGBA; R_DrawFogBoundary = R_DrawFogBoundary_RGBA; - R_FillColumnHoriz = R_FillColumnHorizP_RGBA_C; + R_FillColumnHoriz = R_FillColumnHorizP_RGBA; R_DrawFogBoundary = R_DrawFogBoundary_RGBA; R_MapTiltedPlane = R_MapTiltedPlane_RGBA; @@ -2338,30 +2339,30 @@ void R_InitColumnDrawers () tmvline1_revsubclamp = tmvline1_revsubclamp_RGBA; tmvline4_revsubclamp = tmvline4_revsubclamp_RGBA; - rt_copy1col = rt_copy1col_RGBA_c; - rt_copy4cols = rt_copy4cols_RGBA_c; - rt_map1col = rt_map1col_RGBA_c; - rt_map4cols = rt_map4cols_RGBA_c; - rt_shaded1col = rt_shaded1col_RGBA_c; - rt_shaded4cols = rt_shaded4cols_RGBA_c; - rt_add1col = rt_add1col_RGBA_c; - rt_add4cols = rt_add4cols_RGBA_c; - rt_addclamp1col = rt_addclamp1col_RGBA_c; - rt_addclamp4cols = rt_addclamp4cols_RGBA_c; - rt_subclamp1col = rt_subclamp1col_RGBA_c; - rt_revsubclamp1col = rt_revsubclamp1col_RGBA_c; - rt_tlate1col = rt_tlate1col_RGBA_c; - rt_tlateadd1col = rt_tlateadd1col_RGBA_c; - rt_tlateaddclamp1col = rt_tlateaddclamp1col_RGBA_c; - rt_tlatesubclamp1col = rt_tlatesubclamp1col_RGBA_c; - rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_RGBA_c; - rt_subclamp4cols = rt_subclamp4cols_RGBA_c; - rt_revsubclamp4cols = rt_revsubclamp4cols_RGBA_c; - rt_tlate4cols = rt_tlate4cols_RGBA_c; - rt_tlateadd4cols = rt_tlateadd4cols_RGBA_c; - rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_RGBA_c; - rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_RGBA_c; - rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_RGBA_c; + rt_copy1col = rt_copy1col_RGBA; + rt_copy4cols = rt_copy4cols_RGBA; + rt_map1col = rt_map1col_RGBA; + rt_map4cols = rt_map4cols_RGBA; + rt_shaded1col = rt_shaded1col_RGBA; + rt_shaded4cols = rt_shaded4cols_RGBA; + rt_add1col = rt_add1col_RGBA; + rt_add4cols = rt_add4cols_RGBA; + rt_addclamp1col = rt_addclamp1col_RGBA; + rt_addclamp4cols = rt_addclamp4cols_RGBA; + rt_subclamp1col = rt_subclamp1col_RGBA; + rt_revsubclamp1col = rt_revsubclamp1col_RGBA; + rt_tlate1col = rt_tlate1col_RGBA; + rt_tlateadd1col = rt_tlateadd1col_RGBA; + rt_tlateaddclamp1col = rt_tlateaddclamp1col_RGBA; + rt_tlatesubclamp1col = rt_tlatesubclamp1col_RGBA; + rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_RGBA; + rt_subclamp4cols = rt_subclamp4cols_RGBA; + rt_revsubclamp4cols = rt_revsubclamp4cols_RGBA; + rt_tlate4cols = rt_tlate4cols_RGBA; + rt_tlateadd4cols = rt_tlateadd4cols_RGBA; + rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_RGBA; + rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_RGBA; + rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_RGBA; rt_initcols = rt_initcols_rgba; rt_span_coverage = rt_span_coverage_rgba; diff --git a/src/r_draw.h b/src/r_draw.h index d09d0ab89..cea05e469 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -24,11 +24,6 @@ #define __R_DRAW__ #include "r_defs.h" -#include -#include -#include -#include -#include // Spectre/Invisibility. #define FUZZTABLE 50 @@ -175,39 +170,6 @@ void rt_map4cols_asm1 (int sx, int yl, int yh); void rt_map4cols_asm2 (int sx, int yl, int yh); void rt_add4cols_asm (int sx, int yl, int yh); void rt_addclamp4cols_asm (int sx, int yl, int yh); - -/// - -void rt_copy1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_copy4cols_RGBA_c (int sx, int yl, int yh); - -void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_shaded4cols_RGBA_c (int sx, int yl, int yh); - -void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); - -void rt_tlate1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_tlateadd1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_tlateaddclamp1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_tlatesubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); -void rt_tlaterevsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh); - -void rt_map4cols_RGBA_c (int sx, int yl, int yh); -void rt_add4cols_RGBA_c (int sx, int yl, int yh); -void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh); -void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh); -void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh); - -void rt_tlate4cols_RGBA_c (int sx, int yl, int yh); -void rt_tlateadd4cols_RGBA_c (int sx, int yl, int yh); -void rt_tlateaddclamp4cols_RGBA_c (int sx, int yl, int yh); -void rt_tlatesubclamp4cols_RGBA_c (int sx, int yl, int yh); -void rt_tlaterevsubclamp4cols_RGBA_c (int sx, int yl, int yh); - } extern void (*rt_copy1col)(int hx, int sx, int yl, int yh); @@ -247,10 +209,8 @@ void rt_draw4cols (int sx); // [RH] Preps the temporary horizontal buffer. void rt_initcols_pal (BYTE *buffer); -void rt_initcols_rgba (BYTE *buffer); void rt_span_coverage_pal(int x, int start, int stop); -void rt_span_coverage_rgba(int x, int start, int stop); extern void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip); @@ -280,46 +240,6 @@ void R_DrawSpanMaskedP_C (void); #endif -void R_DrawColumnHorizP_RGBA_C (void); -void R_DrawColumnP_RGBA_C (void); -void R_DrawFuzzColumnP_RGBA_C (void); -void R_DrawTranslatedColumnP_RGBA_C (void); -void R_DrawShadedColumnP_RGBA_C (void); -void R_DrawSpanP_RGBA_C (void); -void R_DrawSpanMaskedP_RGBA_C (void); - -void R_DrawSpanTranslucentP_RGBA_C(); -void R_DrawSpanMaskedTranslucentP_RGBA_C(); -void R_DrawSpanAddClampP_RGBA_C(); -void R_DrawSpanMaskedAddClampP_RGBA_C(); -void R_FillColumnP_RGBA(); -void R_FillAddColumn_RGBA_C(); -void R_FillAddClampColumn_RGBA(); -void R_FillSubClampColumn_RGBA(); -void R_FillRevSubClampColumn_RGBA(); -void R_DrawAddColumnP_RGBA_C(); -void R_DrawTlatedAddColumnP_RGBA_C(); -void R_DrawAddClampColumnP_RGBA_C(); -void R_DrawAddClampTranslatedColumnP_RGBA_C(); -void R_DrawSubClampColumnP_RGBA_C(); -void R_DrawSubClampTranslatedColumnP_RGBA_C(); -void R_DrawRevSubClampColumnP_RGBA_C(); -void R_DrawRevSubClampTranslatedColumnP_RGBA_C(); -void R_FillSpan_RGBA(); -void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip); -fixed_t tmvline1_add_RGBA(); -void tmvline4_add_RGBA(); -fixed_t tmvline1_addclamp_RGBA(); -void tmvline4_addclamp_RGBA(); -fixed_t tmvline1_subclamp_RGBA(); -void tmvline4_subclamp_RGBA(); -fixed_t tmvline1_revsubclamp_RGBA(); -void tmvline4_revsubclamp_RGBA(); -DWORD vlinec1_RGBA(); -void vlinec4_RGBA(); -DWORD mvlinec1_RGBA(); -void mvlinec4_RGBA(); - void R_DrawSpanTranslucentP_C (void); void R_DrawSpanMaskedTranslucentP_C (void); @@ -348,9 +268,6 @@ void R_FillColumnP_C (void); void R_FillColumnHorizP_C (void); void R_FillSpan_C (void); -void R_FillColumnHorizP_RGBA_C(void); -void R_FillSpan_RGBA_C(void); - #ifdef X86_ASM #define R_SetupDrawSlab R_SetupDrawSlabA #define R_DrawSlab R_DrawSlabA @@ -443,160 +360,4 @@ void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade); void R_SetTranslationMap(lighttable_t *translation); -// Redirect drawer commands to worker threads -void R_BeginDrawerCommands(); - -// Wait until all drawers finished executing -void R_EndDrawerCommands(); - -class DrawerCommandQueue; - -class DrawerThread -{ -public: - std::thread thread; - - // Thread line index of this thread - int core = 0; - - // Number of active threads - int num_cores = 1; - - // Range of rows processed this pass - int pass_start_y = 0; - int pass_end_y = MAXHEIGHT; - - uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; - uint32_t *dc_temp_rgba; - - // Checks if a line is rendered by this thread - bool line_skipped_by_thread(int line) - { - return line < pass_start_y || line >= pass_end_y || line % num_cores != core; - } - - // The number of lines to skip to reach the first line to be rendered by this thread - int skipped_by_thread(int first_line) - { - int pass_skip = MAX(pass_start_y - first_line, 0); - int core_skip = (num_cores - (first_line + pass_skip - core) % num_cores) % num_cores; - return pass_skip + core_skip; - } - - // The number of lines to be rendered by this thread - int count_for_thread(int first_line, int count) - { - int lines_until_pass_end = MAX(pass_end_y - first_line, 0); - count = MIN(count, lines_until_pass_end); - int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores; - return MAX(c, 0); - } - - // Calculate the dest address for the first line to be rendered by this thread - uint32_t *dest_for_thread(int first_line, int pitch, uint32_t *dest) - { - return dest + skipped_by_thread(first_line) * pitch; - } -}; - -class DrawerCommand -{ -protected: - int dc_dest_y; - -public: - DrawerCommand() - { - dc_dest_y = static_cast((dc_dest - dc_destorg) / (dc_pitch * 4)); - } - - virtual void Execute(DrawerThread *thread) = 0; -}; - -class DrawerCommandQueue -{ - enum { memorypool_size = 4 * 1024 * 1024 }; - char memorypool[memorypool_size]; - size_t memorypool_pos = 0; - - std::vector commands; - - std::vector threads; - - std::mutex start_mutex; - std::condition_variable start_condition; - std::vector active_commands; - bool shutdown_flag = false; - int run_id = 0; - - std::mutex end_mutex; - std::condition_variable end_condition; - size_t finished_threads = 0; - - int threaded_render = 0; - DrawerThread single_core_thread; - int num_passes = 2; - int rows_in_pass = 540; - - void StartThreads(); - void StopThreads(); - void Finish(); - - static DrawerCommandQueue *Instance(); - - ~DrawerCommandQueue(); - -public: - // Allocate memory valid for the duration of a command execution - static void* AllocMemory(size_t size); - - // Queue command to be executed by drawer worker threads - template - static void QueueCommand(Types &&... args) - { - auto queue = Instance(); - if (queue->threaded_render == 0) - { - T command(std::forward(args)...); - command.Execute(&queue->single_core_thread); - } - else - { - void *ptr = AllocMemory(sizeof(T)); - if (!ptr) - return; - T *command = new (ptr)T(std::forward(args)...); - queue->commands.push_back(command); - } - } - - // Redirects all drawing commands to worker threads until End is called - // Begin/End blocks can be nested. - static void Begin(); - - // End redirection and wait until all worker threads finished executing - static void End(); - - // Waits until all worker threads finished executing - static void WaitForWorkers(); -}; - -class ApplySpecialColormapRGBACommand : public DrawerCommand -{ - BYTE *buffer; - int pitch; - int width; - int height; - int start_red; - int start_green; - int start_blue; - int end_red; - int end_green; - int end_blue; - -public: - ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen); - void Execute(DrawerThread *thread) override; -}; - #endif diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 7e9f85117..a9dd2db32 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -37,6 +37,7 @@ #include "v_palette.h" #include "r_data/colormaps.h" #include "r_plane.h" +#include "r_draw_rgba.h" #include "gi.h" #include "stats.h" @@ -3655,7 +3656,7 @@ void R_EndDrawerCommands() DrawerCommandQueue::End(); } -void R_DrawColumnP_RGBA_C() +void R_DrawColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } @@ -3665,7 +3666,7 @@ void R_FillColumnP_RGBA() DrawerCommandQueue::QueueCommand(); } -void R_FillAddColumn_RGBA_C() +void R_FillAddColumn_RGBA() { DrawerCommandQueue::QueueCommand(); } @@ -3685,88 +3686,88 @@ void R_FillRevSubClampColumn_RGBA() DrawerCommandQueue::QueueCommand(); } -void R_DrawFuzzColumnP_RGBA_C() +void R_DrawFuzzColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); fuzzpos = (fuzzpos + dc_yh - dc_yl) % FUZZTABLE; } -void R_DrawAddColumnP_RGBA_C() +void R_DrawAddColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawTranslatedColumnP_RGBA_C() +void R_DrawTranslatedColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawTlatedAddColumnP_RGBA_C() +void R_DrawTlatedAddColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawShadedColumnP_RGBA_C() +void R_DrawShadedColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawAddClampColumnP_RGBA_C() +void R_DrawAddClampColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawAddClampTranslatedColumnP_RGBA_C() +void R_DrawAddClampTranslatedColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSubClampColumnP_RGBA_C() +void R_DrawSubClampColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSubClampTranslatedColumnP_RGBA_C() +void R_DrawSubClampTranslatedColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawRevSubClampColumnP_RGBA_C() +void R_DrawRevSubClampColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawRevSubClampTranslatedColumnP_RGBA_C() +void R_DrawRevSubClampTranslatedColumnP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanP_RGBA_C() +void R_DrawSpanP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanMaskedP_RGBA_C() +void R_DrawSpanMaskedP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanTranslucentP_RGBA_C() +void R_DrawSpanTranslucentP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanMaskedTranslucentP_RGBA_C() +void R_DrawSpanMaskedTranslucentP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanAddClampP_RGBA_C() +void R_DrawSpanAddClampP_RGBA() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanMaskedAddClampP_RGBA_C() +void R_DrawSpanMaskedAddClampP_RGBA() { DrawerCommandQueue::QueueCommand(); } diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h new file mode 100644 index 000000000..5d7402634 --- /dev/null +++ b/src/r_draw_rgba.h @@ -0,0 +1,276 @@ +// Emacs style mode select -*- C++ -*- +//----------------------------------------------------------------------------- +// +// $Id:$ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// +// DESCRIPTION: +// System specific interface stuff. +// +//----------------------------------------------------------------------------- + + +#ifndef __R_DRAW_RGBA__ +#define __R_DRAW_RGBA__ + +#include "r_draw.h" +#include +#include +#include +#include +#include + +///////////////////////////////////////////////////////////////////////////// +// Drawer functions: + +void rt_initcols_rgba(BYTE *buffer); +void rt_span_coverage_rgba(int x, int start, int stop); + +void rt_copy1col_RGBA(int hx, int sx, int yl, int yh); +void rt_copy4cols_RGBA(int sx, int yl, int yh); +void rt_shaded1col_RGBA(int hx, int sx, int yl, int yh); +void rt_shaded4cols_RGBA(int sx, int yl, int yh); +void rt_map1col_RGBA(int hx, int sx, int yl, int yh); +void rt_add1col_RGBA(int hx, int sx, int yl, int yh); +void rt_addclamp1col_RGBA(int hx, int sx, int yl, int yh); +void rt_subclamp1col_RGBA(int hx, int sx, int yl, int yh); +void rt_revsubclamp1col_RGBA(int hx, int sx, int yl, int yh); +void rt_tlate1col_RGBA(int hx, int sx, int yl, int yh); +void rt_tlateadd1col_RGBA(int hx, int sx, int yl, int yh); +void rt_tlateaddclamp1col_RGBA(int hx, int sx, int yl, int yh); +void rt_tlatesubclamp1col_RGBA(int hx, int sx, int yl, int yh); +void rt_tlaterevsubclamp1col_RGBA(int hx, int sx, int yl, int yh); +void rt_map4cols_RGBA(int sx, int yl, int yh); +void rt_add4cols_RGBA(int sx, int yl, int yh); +void rt_addclamp4cols_RGBA(int sx, int yl, int yh); +void rt_subclamp4cols_RGBA(int sx, int yl, int yh); +void rt_revsubclamp4cols_RGBA(int sx, int yl, int yh); +void rt_tlate4cols_RGBA(int sx, int yl, int yh); +void rt_tlateadd4cols_RGBA(int sx, int yl, int yh); +void rt_tlateaddclamp4cols_RGBA(int sx, int yl, int yh); +void rt_tlatesubclamp4cols_RGBA(int sx, int yl, int yh); +void rt_tlaterevsubclamp4cols_RGBA(int sx, int yl, int yh); + +void R_DrawColumnHorizP_RGBA(); +void R_DrawColumnP_RGBA(); +void R_DrawFuzzColumnP_RGBA(); +void R_DrawTranslatedColumnP_RGBA(); +void R_DrawShadedColumnP_RGBA(); + +void R_FillColumnP_RGBA(); +void R_FillAddColumn_RGBA(); +void R_FillAddClampColumn_RGBA(); +void R_FillSubClampColumn_RGBA(); +void R_FillRevSubClampColumn_RGBA(); +void R_DrawAddColumnP_RGBA(); +void R_DrawTlatedAddColumnP_RGBA(); +void R_DrawAddClampColumnP_RGBA(); +void R_DrawAddClampTranslatedColumnP_RGBA(); +void R_DrawSubClampColumnP_RGBA(); +void R_DrawSubClampTranslatedColumnP_RGBA(); +void R_DrawRevSubClampColumnP_RGBA(); +void R_DrawRevSubClampTranslatedColumnP_RGBA(); + +void R_DrawSpanP_RGBA(void); +void R_DrawSpanMaskedP_RGBA(void); +void R_DrawSpanTranslucentP_RGBA(); +void R_DrawSpanMaskedTranslucentP_RGBA(); +void R_DrawSpanAddClampP_RGBA(); +void R_DrawSpanMaskedAddClampP_RGBA(); +void R_FillSpan_RGBA(); + +void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip); + +DWORD vlinec1_RGBA(); +void vlinec4_RGBA(); +DWORD mvlinec1_RGBA(); +void mvlinec4_RGBA(); +fixed_t tmvline1_add_RGBA(); +void tmvline4_add_RGBA(); +fixed_t tmvline1_addclamp_RGBA(); +void tmvline4_addclamp_RGBA(); +fixed_t tmvline1_subclamp_RGBA(); +void tmvline4_subclamp_RGBA(); +fixed_t tmvline1_revsubclamp_RGBA(); +void tmvline4_revsubclamp_RGBA(); + +void R_FillColumnHorizP_RGBA(); +void R_FillSpan_RGBA(); + +///////////////////////////////////////////////////////////////////////////// +// Multithreaded rendering infrastructure: + +// Redirect drawer commands to worker threads +void R_BeginDrawerCommands(); + +// Wait until all drawers finished executing +void R_EndDrawerCommands(); + +struct FSpecialColormap; +class DrawerCommandQueue; + +// Worker data for each thread executing drawer commands +class DrawerThread +{ +public: + std::thread thread; + + // Thread line index of this thread + int core = 0; + + // Number of active threads + int num_cores = 1; + + // Range of rows processed this pass + int pass_start_y = 0; + int pass_end_y = MAXHEIGHT; + + uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; + uint32_t *dc_temp_rgba; + + // Checks if a line is rendered by this thread + bool line_skipped_by_thread(int line) + { + return line < pass_start_y || line >= pass_end_y || line % num_cores != core; + } + + // The number of lines to skip to reach the first line to be rendered by this thread + int skipped_by_thread(int first_line) + { + int pass_skip = MAX(pass_start_y - first_line, 0); + int core_skip = (num_cores - (first_line + pass_skip - core) % num_cores) % num_cores; + return pass_skip + core_skip; + } + + // The number of lines to be rendered by this thread + int count_for_thread(int first_line, int count) + { + int lines_until_pass_end = MAX(pass_end_y - first_line, 0); + count = MIN(count, lines_until_pass_end); + int c = (count - skipped_by_thread(first_line) + num_cores - 1) / num_cores; + return MAX(c, 0); + } + + // Calculate the dest address for the first line to be rendered by this thread + uint32_t *dest_for_thread(int first_line, int pitch, uint32_t *dest) + { + return dest + skipped_by_thread(first_line) * pitch; + } +}; + +// Task to be executed by each worker thread +class DrawerCommand +{ +protected: + int dc_dest_y; + +public: + DrawerCommand() + { + dc_dest_y = static_cast((dc_dest - dc_destorg) / (dc_pitch * 4)); + } + + virtual void Execute(DrawerThread *thread) = 0; +}; + +// Manages queueing up commands and executing them on worker threads +class DrawerCommandQueue +{ + enum { memorypool_size = 4 * 1024 * 1024 }; + char memorypool[memorypool_size]; + size_t memorypool_pos = 0; + + std::vector commands; + + std::vector threads; + + std::mutex start_mutex; + std::condition_variable start_condition; + std::vector active_commands; + bool shutdown_flag = false; + int run_id = 0; + + std::mutex end_mutex; + std::condition_variable end_condition; + size_t finished_threads = 0; + + int threaded_render = 0; + DrawerThread single_core_thread; + int num_passes = 2; + int rows_in_pass = 540; + + void StartThreads(); + void StopThreads(); + void Finish(); + + static DrawerCommandQueue *Instance(); + + ~DrawerCommandQueue(); + +public: + // Allocate memory valid for the duration of a command execution + static void* AllocMemory(size_t size); + + // Queue command to be executed by drawer worker threads + template + static void QueueCommand(Types &&... args) + { + auto queue = Instance(); + if (queue->threaded_render == 0) + { + T command(std::forward(args)...); + command.Execute(&queue->single_core_thread); + } + else + { + void *ptr = AllocMemory(sizeof(T)); + if (!ptr) + return; + T *command = new (ptr)T(std::forward(args)...); + queue->commands.push_back(command); + } + } + + // Redirects all drawing commands to worker threads until End is called + // Begin/End blocks can be nested. + static void Begin(); + + // End redirection and wait until all worker threads finished executing + static void End(); + + // Waits until all worker threads finished executing + static void WaitForWorkers(); +}; + +///////////////////////////////////////////////////////////////////////////// +// Drawer commands: + +class ApplySpecialColormapRGBACommand : public DrawerCommand +{ + BYTE *buffer; + int pitch; + int width; + int height; + int start_red; + int start_green; + int start_blue; + int end_red; + int end_green; + int end_blue; + +public: + ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen); + void Execute(DrawerThread *thread) override; +}; + +#endif diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index cd124ac63..32d5080c5 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -42,6 +42,7 @@ #include "r_main.h" #include "r_things.h" #include "v_video.h" +#include "r_draw_rgba.h" #ifndef NO_SSE #include #endif @@ -1628,171 +1629,171 @@ public: ///////////////////////////////////////////////////////////////////////////// // Copies one span at hx to the screen at sx. -void rt_copy1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_copy1col_RGBA (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Copies all four spans to the screen starting at sx. -void rt_copy4cols_RGBA_c (int sx, int yl, int yh) +void rt_copy4cols_RGBA (int sx, int yl, int yh) { // To do: we could do this with SSE using __m128i - rt_copy1col_RGBA_c(0, sx, yl, yh); - rt_copy1col_RGBA_c(1, sx + 1, yl, yh); - rt_copy1col_RGBA_c(2, sx + 2, yl, yh); - rt_copy1col_RGBA_c(3, sx + 3, yl, yh); + rt_copy1col_RGBA(0, sx, yl, yh); + rt_copy1col_RGBA(1, sx + 1, yl, yh); + rt_copy1col_RGBA(2, sx + 2, yl, yh); + rt_copy1col_RGBA(3, sx + 3, yl, yh); } // Maps one span at hx to the screen at sx. -void rt_map1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_map1col_RGBA (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Maps all four spans to the screen starting at sx. -void rt_map4cols_RGBA_c (int sx, int yl, int yh) +void rt_map4cols_RGBA (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } -void rt_Translate1col_RGBA_c(const BYTE *translation, int hx, int yl, int yh) +void rt_Translate1col_RGBA(const BYTE *translation, int hx, int yl, int yh) { DrawerCommandQueue::QueueCommand(translation, hx, yl, yh); } -void rt_Translate4cols_RGBA_c(const BYTE *translation, int yl, int yh) +void rt_Translate4cols_RGBA(const BYTE *translation, int yl, int yh) { DrawerCommandQueue::QueueCommand(translation, yl, yh); } // Translates one span at hx to the screen at sx. -void rt_tlate1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_tlate1col_RGBA (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); + rt_Translate1col_RGBA(dc_translation, hx, yl, yh); rt_map1col(hx, sx, yl, yh); } // Translates all four spans to the screen starting at sx. -void rt_tlate4cols_RGBA_c (int sx, int yl, int yh) +void rt_tlate4cols_RGBA (int sx, int yl, int yh) { - rt_Translate4cols_RGBA_c(dc_translation, yl, yh); + rt_Translate4cols_RGBA(dc_translation, yl, yh); rt_map4cols(sx, yl, yh); } // Adds one span at hx to the screen at sx without clamping. -void rt_add1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_add1col_RGBA (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Adds all four spans to the screen starting at sx without clamping. -void rt_add4cols_RGBA_c (int sx, int yl, int yh) +void rt_add4cols_RGBA (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and adds one span at hx to the screen at sx without clamping. -void rt_tlateadd1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_tlateadd1col_RGBA (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); + rt_Translate1col_RGBA(dc_translation, hx, yl, yh); rt_add1col(hx, sx, yl, yh); } // Translates and adds all four spans to the screen starting at sx without clamping. -void rt_tlateadd4cols_RGBA_c(int sx, int yl, int yh) +void rt_tlateadd4cols_RGBA(int sx, int yl, int yh) { - rt_Translate4cols_RGBA_c(dc_translation, yl, yh); + rt_Translate4cols_RGBA(dc_translation, yl, yh); rt_add4cols(sx, yl, yh); } // Shades one span at hx to the screen at sx. -void rt_shaded1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_shaded1col_RGBA (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Shades all four spans to the screen starting at sx. -void rt_shaded4cols_RGBA_c (int sx, int yl, int yh) +void rt_shaded4cols_RGBA (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Adds one span at hx to the screen at sx with clamping. -void rt_addclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_addclamp1col_RGBA (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Adds all four spans to the screen starting at sx with clamping. -void rt_addclamp4cols_RGBA_c (int sx, int yl, int yh) +void rt_addclamp4cols_RGBA (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and adds one span at hx to the screen at sx with clamping. -void rt_tlateaddclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_tlateaddclamp1col_RGBA (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); - rt_addclamp1col_RGBA_c(hx, sx, yl, yh); + rt_Translate1col_RGBA(dc_translation, hx, yl, yh); + rt_addclamp1col_RGBA(hx, sx, yl, yh); } // Translates and adds all four spans to the screen starting at sx with clamping. -void rt_tlateaddclamp4cols_RGBA_c (int sx, int yl, int yh) +void rt_tlateaddclamp4cols_RGBA (int sx, int yl, int yh) { - rt_Translate4cols_RGBA_c(dc_translation, yl, yh); + rt_Translate4cols_RGBA(dc_translation, yl, yh); rt_addclamp4cols(sx, yl, yh); } // Subtracts one span at hx to the screen at sx with clamping. -void rt_subclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_subclamp1col_RGBA (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Subtracts all four spans to the screen starting at sx with clamping. -void rt_subclamp4cols_RGBA_c (int sx, int yl, int yh) +void rt_subclamp4cols_RGBA (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and subtracts one span at hx to the screen at sx with clamping. -void rt_tlatesubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_tlatesubclamp1col_RGBA (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); - rt_subclamp1col_RGBA_c(hx, sx, yl, yh); + rt_Translate1col_RGBA(dc_translation, hx, yl, yh); + rt_subclamp1col_RGBA(hx, sx, yl, yh); } // Translates and subtracts all four spans to the screen starting at sx with clamping. -void rt_tlatesubclamp4cols_RGBA_c (int sx, int yl, int yh) +void rt_tlatesubclamp4cols_RGBA (int sx, int yl, int yh) { - rt_Translate4cols_RGBA_c(dc_translation, yl, yh); - rt_subclamp4cols_RGBA_c(sx, yl, yh); + rt_Translate4cols_RGBA(dc_translation, yl, yh); + rt_subclamp4cols_RGBA(sx, yl, yh); } // Subtracts one span at hx from the screen at sx with clamping. -void rt_revsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_revsubclamp1col_RGBA (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Subtracts all four spans from the screen starting at sx with clamping. -void rt_revsubclamp4cols_RGBA_c (int sx, int yl, int yh) +void rt_revsubclamp4cols_RGBA (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and subtracts one span at hx from the screen at sx with clamping. -void rt_tlaterevsubclamp1col_RGBA_c (int hx, int sx, int yl, int yh) +void rt_tlaterevsubclamp1col_RGBA (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA_c(dc_translation, hx, yl, yh); - rt_revsubclamp1col_RGBA_c(hx, sx, yl, yh); + rt_Translate1col_RGBA(dc_translation, hx, yl, yh); + rt_revsubclamp1col_RGBA(hx, sx, yl, yh); } // Translates and subtracts all four spans from the screen starting at sx with clamping. -void rt_tlaterevsubclamp4cols_RGBA_c (int sx, int yl, int yh) +void rt_tlaterevsubclamp4cols_RGBA (int sx, int yl, int yh) { - rt_Translate4cols_RGBA_c(dc_translation, yl, yh); - rt_revsubclamp4cols_RGBA_c(sx, yl, yh); + rt_Translate4cols_RGBA(dc_translation, yl, yh); + rt_revsubclamp4cols_RGBA(sx, yl, yh); } // Before each pass through a rendering loop that uses these routines, @@ -1815,7 +1816,7 @@ void rt_span_coverage_rgba(int x, int start, int stop) // Stretches a column into a temporary buffer which is later // drawn to the screen along with up to three other columns. -void R_DrawColumnHorizP_RGBA_C (void) +void R_DrawColumnHorizP_RGBA (void) { if (dc_count <= 0) return; @@ -1830,7 +1831,7 @@ void R_DrawColumnHorizP_RGBA_C (void) } // [RH] Just fills a column with a given color -void R_FillColumnHorizP_RGBA_C (void) +void R_FillColumnHorizP_RGBA (void) { if (dc_count <= 0) return; diff --git a/src/r_main.cpp b/src/r_main.cpp index 4e5ff1dbd..247a98125 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -40,6 +40,7 @@ #include "r_segs.h" #include "r_3dfloors.h" #include "r_sky.h" +#include "r_draw_rgba.h" #include "st_stuff.h" #include "c_cvars.h" #include "c_dispatch.h" diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 5be41660e..fbbd65b17 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -42,6 +42,7 @@ #include "r_3dfloors.h" #include "textures/textures.h" #include "r_data/voxels.h" +#include "r_draw_rgba.h" EXTERN_CVAR(Bool, r_shadercolormaps) diff --git a/src/r_things.cpp b/src/r_things.cpp index 2dc0bdb6c..f1f29f160 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -58,6 +58,7 @@ #include "r_plane.h" #include "r_segs.h" #include "r_3dfloors.h" +#include "r_draw_rgba.h" #include "v_palette.h" #include "r_data/r_translate.h" #include "r_data/colormaps.h" From 586d5cdf1eb5609fcd480aa0d69fc764c4fc0103 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 13 Jun 2016 23:33:52 +0200 Subject: [PATCH 043/100] Normalize naming convention a little --- src/r_draw.cpp | 136 +++++++++++++++++++++---------------------- src/r_draw_rgba.cpp | 80 ++++++++++++------------- src/r_draw_rgba.h | 128 ++++++++++++++++++++-------------------- src/r_drawt_rgba.cpp | 94 +++++++++++++++--------------- src/r_plane.cpp | 4 +- src/r_plane.h | 4 +- src/r_things.cpp | 2 +- src/r_things.h | 2 +- 8 files changed, 225 insertions(+), 225 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 552e5ff13..ecb4441f8 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -2296,82 +2296,82 @@ void R_InitColumnDrawers () domvline4_saved = domvline4; } - R_DrawColumnHoriz = R_DrawColumnHorizP_RGBA; - R_DrawColumn = R_DrawColumnP_RGBA; - R_DrawFuzzColumn = R_DrawFuzzColumnP_RGBA; - R_DrawTranslatedColumn = R_DrawTranslatedColumnP_RGBA; - R_DrawShadedColumn = R_DrawShadedColumnP_RGBA; - R_DrawSpanMasked = R_DrawSpanMaskedP_RGBA; - R_DrawSpan = R_DrawSpanP_RGBA; + R_DrawColumnHoriz = R_DrawColumnHoriz_rgba; + R_DrawColumn = R_DrawColumn_rgba; + R_DrawFuzzColumn = R_DrawFuzzColumn_rgba; + R_DrawTranslatedColumn = R_DrawTranslatedColumn_rgba; + R_DrawShadedColumn = R_DrawShadedColumn_rgba; + R_DrawSpanMasked = R_DrawSpanMasked_rgba; + R_DrawSpan = R_DrawSpan_rgba; - R_DrawSpanTranslucent = R_DrawSpanTranslucentP_RGBA; - R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucentP_RGBA; - R_DrawSpanAddClamp = R_DrawSpanAddClampP_RGBA; - R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClampP_RGBA; - R_FillColumn = R_FillColumnP_RGBA; - R_FillAddColumn = R_FillAddColumn_RGBA; - R_FillAddClampColumn = R_FillAddClampColumn_RGBA; - R_FillSubClampColumn = R_FillSubClampColumn_RGBA; - R_FillRevSubClampColumn = R_FillRevSubClampColumn_RGBA; - R_DrawAddColumn = R_DrawAddColumnP_RGBA; - R_DrawTlatedAddColumn = R_DrawTlatedAddColumnP_RGBA; - R_DrawAddClampColumn = R_DrawAddClampColumnP_RGBA; - R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumnP_RGBA; - R_DrawSubClampColumn = R_DrawSubClampColumnP_RGBA; - R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumnP_RGBA; - R_DrawRevSubClampColumn = R_DrawRevSubClampColumnP_RGBA; - R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumnP_RGBA; - R_FillSpan = R_FillSpan_RGBA; - R_DrawFogBoundary = R_DrawFogBoundary_RGBA; - R_FillColumnHoriz = R_FillColumnHorizP_RGBA; + R_DrawSpanTranslucent = R_DrawSpanTranslucent_rgba; + R_DrawSpanMaskedTranslucent = R_DrawSpanMaskedTranslucent_rgba; + R_DrawSpanAddClamp = R_DrawSpanAddClamp_rgba; + R_DrawSpanMaskedAddClamp = R_DrawSpanMaskedAddClamp_rgba; + R_FillColumn = R_FillColumn_rgba; + R_FillAddColumn = R_FillAddColumn_rgba; + R_FillAddClampColumn = R_FillAddClampColumn_rgba; + R_FillSubClampColumn = R_FillSubClampColumn_rgba; + R_FillRevSubClampColumn = R_FillRevSubClampColumn_rgba; + R_DrawAddColumn = R_DrawAddColumn_rgba; + R_DrawTlatedAddColumn = R_DrawTlatedAddColumn_rgba; + R_DrawAddClampColumn = R_DrawAddClampColumn_rgba; + R_DrawAddClampTranslatedColumn = R_DrawAddClampTranslatedColumn_rgba; + R_DrawSubClampColumn = R_DrawSubClampColumn_rgba; + R_DrawSubClampTranslatedColumn = R_DrawSubClampTranslatedColumn_rgba; + R_DrawRevSubClampColumn = R_DrawRevSubClampColumn_rgba; + R_DrawRevSubClampTranslatedColumn = R_DrawRevSubClampTranslatedColumn_rgba; + R_FillSpan = R_FillSpan_rgba; + R_DrawFogBoundary = R_DrawFogBoundary_rgba; + R_FillColumnHoriz = R_FillColumnHoriz_rgba; - R_DrawFogBoundary = R_DrawFogBoundary_RGBA; - R_MapTiltedPlane = R_MapTiltedPlane_RGBA; - R_MapColoredPlane = R_MapColoredPlane_RGBA; - R_DrawParticle = R_DrawParticle_RGBA; + R_DrawFogBoundary = R_DrawFogBoundary_rgba; + R_MapTiltedPlane = R_MapTiltedPlane_rgba; + R_MapColoredPlane = R_MapColoredPlane_rgba; + R_DrawParticle = R_DrawParticle_rgba; - tmvline1_add = tmvline1_add_RGBA; - tmvline4_add = tmvline4_add_RGBA; - tmvline1_addclamp = tmvline1_addclamp_RGBA; - tmvline4_addclamp = tmvline4_addclamp_RGBA; - tmvline1_subclamp = tmvline1_subclamp_RGBA; - tmvline4_subclamp = tmvline4_subclamp_RGBA; - tmvline1_revsubclamp = tmvline1_revsubclamp_RGBA; - tmvline4_revsubclamp = tmvline4_revsubclamp_RGBA; + tmvline1_add = tmvline1_add_rgba; + tmvline4_add = tmvline4_add_rgba; + tmvline1_addclamp = tmvline1_addclamp_rgba; + tmvline4_addclamp = tmvline4_addclamp_rgba; + tmvline1_subclamp = tmvline1_subclamp_rgba; + tmvline4_subclamp = tmvline4_subclamp_rgba; + tmvline1_revsubclamp = tmvline1_revsubclamp_rgba; + tmvline4_revsubclamp = tmvline4_revsubclamp_rgba; - rt_copy1col = rt_copy1col_RGBA; - rt_copy4cols = rt_copy4cols_RGBA; - rt_map1col = rt_map1col_RGBA; - rt_map4cols = rt_map4cols_RGBA; - rt_shaded1col = rt_shaded1col_RGBA; - rt_shaded4cols = rt_shaded4cols_RGBA; - rt_add1col = rt_add1col_RGBA; - rt_add4cols = rt_add4cols_RGBA; - rt_addclamp1col = rt_addclamp1col_RGBA; - rt_addclamp4cols = rt_addclamp4cols_RGBA; - rt_subclamp1col = rt_subclamp1col_RGBA; - rt_revsubclamp1col = rt_revsubclamp1col_RGBA; - rt_tlate1col = rt_tlate1col_RGBA; - rt_tlateadd1col = rt_tlateadd1col_RGBA; - rt_tlateaddclamp1col = rt_tlateaddclamp1col_RGBA; - rt_tlatesubclamp1col = rt_tlatesubclamp1col_RGBA; - rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_RGBA; - rt_subclamp4cols = rt_subclamp4cols_RGBA; - rt_revsubclamp4cols = rt_revsubclamp4cols_RGBA; - rt_tlate4cols = rt_tlate4cols_RGBA; - rt_tlateadd4cols = rt_tlateadd4cols_RGBA; - rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_RGBA; - rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_RGBA; - rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_RGBA; + rt_copy1col = rt_copy1col_rgba; + rt_copy4cols = rt_copy4cols_rgba; + rt_map1col = rt_map1col_rgba; + rt_map4cols = rt_map4cols_rgba; + rt_shaded1col = rt_shaded1col_rgba; + rt_shaded4cols = rt_shaded4cols_rgba; + rt_add1col = rt_add1col_rgba; + rt_add4cols = rt_add4cols_rgba; + rt_addclamp1col = rt_addclamp1col_rgba; + rt_addclamp4cols = rt_addclamp4cols_rgba; + rt_subclamp1col = rt_subclamp1col_rgba; + rt_revsubclamp1col = rt_revsubclamp1col_rgba; + rt_tlate1col = rt_tlate1col_rgba; + rt_tlateadd1col = rt_tlateadd1col_rgba; + rt_tlateaddclamp1col = rt_tlateaddclamp1col_rgba; + rt_tlatesubclamp1col = rt_tlatesubclamp1col_rgba; + rt_tlaterevsubclamp1col = rt_tlaterevsubclamp1col_rgba; + rt_subclamp4cols = rt_subclamp4cols_rgba; + rt_revsubclamp4cols = rt_revsubclamp4cols_rgba; + rt_tlate4cols = rt_tlate4cols_rgba; + rt_tlateadd4cols = rt_tlateadd4cols_rgba; + rt_tlateaddclamp4cols = rt_tlateaddclamp4cols_rgba; + rt_tlatesubclamp4cols = rt_tlatesubclamp4cols_rgba; + rt_tlaterevsubclamp4cols = rt_tlaterevsubclamp4cols_rgba; rt_initcols = rt_initcols_rgba; rt_span_coverage = rt_span_coverage_rgba; - dovline1 = vlinec1_RGBA; - doprevline1 = vlinec1_RGBA; - domvline1 = mvlinec1_RGBA; + dovline1 = vlinec1_rgba; + doprevline1 = vlinec1_rgba; + domvline1 = mvlinec1_rgba; - dovline4 = vlinec4_RGBA; - domvline4 = mvlinec4_RGBA; + dovline4 = vlinec4_rgba; + domvline4 = mvlinec4_rgba; } else { diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index a9dd2db32..2062609b4 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -3656,130 +3656,130 @@ void R_EndDrawerCommands() DrawerCommandQueue::End(); } -void R_DrawColumnP_RGBA() +void R_DrawColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_FillColumnP_RGBA() +void R_FillColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_FillAddColumn_RGBA() +void R_FillAddColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_FillAddClampColumn_RGBA() +void R_FillAddClampColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_FillSubClampColumn_RGBA() +void R_FillSubClampColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_FillRevSubClampColumn_RGBA() +void R_FillRevSubClampColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawFuzzColumnP_RGBA() +void R_DrawFuzzColumn_rgba() { DrawerCommandQueue::QueueCommand(); fuzzpos = (fuzzpos + dc_yh - dc_yl) % FUZZTABLE; } -void R_DrawAddColumnP_RGBA() +void R_DrawAddColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawTranslatedColumnP_RGBA() +void R_DrawTranslatedColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawTlatedAddColumnP_RGBA() +void R_DrawTlatedAddColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawShadedColumnP_RGBA() +void R_DrawShadedColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawAddClampColumnP_RGBA() +void R_DrawAddClampColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawAddClampTranslatedColumnP_RGBA() +void R_DrawAddClampTranslatedColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSubClampColumnP_RGBA() +void R_DrawSubClampColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSubClampTranslatedColumnP_RGBA() +void R_DrawSubClampTranslatedColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawRevSubClampColumnP_RGBA() +void R_DrawRevSubClampColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawRevSubClampTranslatedColumnP_RGBA() +void R_DrawRevSubClampTranslatedColumn_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanP_RGBA() +void R_DrawSpan_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanMaskedP_RGBA() +void R_DrawSpanMasked_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanTranslucentP_RGBA() +void R_DrawSpanTranslucent_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanMaskedTranslucentP_RGBA() +void R_DrawSpanMaskedTranslucent_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanAddClampP_RGBA() +void R_DrawSpanAddClamp_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_DrawSpanMaskedAddClampP_RGBA() +void R_DrawSpanMaskedAddClamp_rgba() { DrawerCommandQueue::QueueCommand(); } -void R_FillSpan_RGBA() +void R_FillSpan_rgba() { DrawerCommandQueue::QueueCommand(); } //extern FTexture *rw_pic; // For the asserts below -DWORD vlinec1_RGBA() +DWORD vlinec1_rgba() { /*DWORD fracstep = dc_iscale; DWORD frac = dc_texturefrac; @@ -3792,79 +3792,79 @@ DWORD vlinec1_RGBA() return dc_texturefrac + dc_count * dc_iscale; } -void vlinec4_RGBA() +void vlinec4_rgba() { DrawerCommandQueue::QueueCommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } -DWORD mvlinec1_RGBA() +DWORD mvlinec1_rgba() { DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } -void mvlinec4_RGBA() +void mvlinec4_rgba() { DrawerCommandQueue::QueueCommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } -fixed_t tmvline1_add_RGBA() +fixed_t tmvline1_add_rgba() { DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } -void tmvline4_add_RGBA() +void tmvline4_add_rgba() { DrawerCommandQueue::QueueCommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } -fixed_t tmvline1_addclamp_RGBA() +fixed_t tmvline1_addclamp_rgba() { DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } -void tmvline4_addclamp_RGBA() +void tmvline4_addclamp_rgba() { DrawerCommandQueue::QueueCommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } -fixed_t tmvline1_subclamp_RGBA() +fixed_t tmvline1_subclamp_rgba() { DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } -void tmvline4_subclamp_RGBA() +void tmvline4_subclamp_rgba() { DrawerCommandQueue::QueueCommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } -fixed_t tmvline1_revsubclamp_RGBA() +fixed_t tmvline1_revsubclamp_rgba() { DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } -void tmvline4_revsubclamp_RGBA() +void tmvline4_revsubclamp_rgba() { DrawerCommandQueue::QueueCommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } -void R_DrawFogBoundarySection_RGBA(int y, int y2, int x1) +void R_DrawFogBoundarySection_rgba(int y, int y2, int x1) { for (; y < y2; ++y) { @@ -3873,7 +3873,7 @@ void R_DrawFogBoundarySection_RGBA(int y, int y2, int x1) } } -void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip) +void R_DrawFogBoundary_rgba(int x1, int x2, short *uclip, short *dclip) { // To do: we do not need to create new spans when using rgba output - instead we should calculate light on a per pixel basis @@ -3913,7 +3913,7 @@ void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip) if (t2 < b2 && rcolormap != 0) { // Colormap 0 is always the identity map, so rendering it is // just a waste of time. - R_DrawFogBoundarySection_RGBA(t2, b2, xr); + R_DrawFogBoundarySection_rgba(t2, b2, xr); } if (t1 < t2) t2 = t1; if (b1 > b2) b2 = b1; @@ -3965,6 +3965,6 @@ void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip) } if (t2 < b2 && rcolormap != 0) { - R_DrawFogBoundarySection_RGBA(t2, b2, x1); + R_DrawFogBoundarySection_rgba(t2, b2, x1); } } diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 5d7402634..9f07ff0bf 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -36,76 +36,76 @@ void rt_initcols_rgba(BYTE *buffer); void rt_span_coverage_rgba(int x, int start, int stop); -void rt_copy1col_RGBA(int hx, int sx, int yl, int yh); -void rt_copy4cols_RGBA(int sx, int yl, int yh); -void rt_shaded1col_RGBA(int hx, int sx, int yl, int yh); -void rt_shaded4cols_RGBA(int sx, int yl, int yh); -void rt_map1col_RGBA(int hx, int sx, int yl, int yh); -void rt_add1col_RGBA(int hx, int sx, int yl, int yh); -void rt_addclamp1col_RGBA(int hx, int sx, int yl, int yh); -void rt_subclamp1col_RGBA(int hx, int sx, int yl, int yh); -void rt_revsubclamp1col_RGBA(int hx, int sx, int yl, int yh); -void rt_tlate1col_RGBA(int hx, int sx, int yl, int yh); -void rt_tlateadd1col_RGBA(int hx, int sx, int yl, int yh); -void rt_tlateaddclamp1col_RGBA(int hx, int sx, int yl, int yh); -void rt_tlatesubclamp1col_RGBA(int hx, int sx, int yl, int yh); -void rt_tlaterevsubclamp1col_RGBA(int hx, int sx, int yl, int yh); -void rt_map4cols_RGBA(int sx, int yl, int yh); -void rt_add4cols_RGBA(int sx, int yl, int yh); -void rt_addclamp4cols_RGBA(int sx, int yl, int yh); -void rt_subclamp4cols_RGBA(int sx, int yl, int yh); -void rt_revsubclamp4cols_RGBA(int sx, int yl, int yh); -void rt_tlate4cols_RGBA(int sx, int yl, int yh); -void rt_tlateadd4cols_RGBA(int sx, int yl, int yh); -void rt_tlateaddclamp4cols_RGBA(int sx, int yl, int yh); -void rt_tlatesubclamp4cols_RGBA(int sx, int yl, int yh); -void rt_tlaterevsubclamp4cols_RGBA(int sx, int yl, int yh); +void rt_copy1col_rgba(int hx, int sx, int yl, int yh); +void rt_copy4cols_rgba(int sx, int yl, int yh); +void rt_shaded1col_rgba(int hx, int sx, int yl, int yh); +void rt_shaded4cols_rgba(int sx, int yl, int yh); +void rt_map1col_rgba(int hx, int sx, int yl, int yh); +void rt_add1col_rgba(int hx, int sx, int yl, int yh); +void rt_addclamp1col_rgba(int hx, int sx, int yl, int yh); +void rt_subclamp1col_rgba(int hx, int sx, int yl, int yh); +void rt_revsubclamp1col_rgba(int hx, int sx, int yl, int yh); +void rt_tlate1col_rgba(int hx, int sx, int yl, int yh); +void rt_tlateadd1col_rgba(int hx, int sx, int yl, int yh); +void rt_tlateaddclamp1col_rgba(int hx, int sx, int yl, int yh); +void rt_tlatesubclamp1col_rgba(int hx, int sx, int yl, int yh); +void rt_tlaterevsubclamp1col_rgba(int hx, int sx, int yl, int yh); +void rt_map4cols_rgba(int sx, int yl, int yh); +void rt_add4cols_rgba(int sx, int yl, int yh); +void rt_addclamp4cols_rgba(int sx, int yl, int yh); +void rt_subclamp4cols_rgba(int sx, int yl, int yh); +void rt_revsubclamp4cols_rgba(int sx, int yl, int yh); +void rt_tlate4cols_rgba(int sx, int yl, int yh); +void rt_tlateadd4cols_rgba(int sx, int yl, int yh); +void rt_tlateaddclamp4cols_rgba(int sx, int yl, int yh); +void rt_tlatesubclamp4cols_rgba(int sx, int yl, int yh); +void rt_tlaterevsubclamp4cols_rgba(int sx, int yl, int yh); -void R_DrawColumnHorizP_RGBA(); -void R_DrawColumnP_RGBA(); -void R_DrawFuzzColumnP_RGBA(); -void R_DrawTranslatedColumnP_RGBA(); -void R_DrawShadedColumnP_RGBA(); +void R_DrawColumnHoriz_rgba(); +void R_DrawColumn_rgba(); +void R_DrawFuzzColumn_rgba(); +void R_DrawTranslatedColumn_rgba(); +void R_DrawShadedColumn_rgba(); -void R_FillColumnP_RGBA(); -void R_FillAddColumn_RGBA(); -void R_FillAddClampColumn_RGBA(); -void R_FillSubClampColumn_RGBA(); -void R_FillRevSubClampColumn_RGBA(); -void R_DrawAddColumnP_RGBA(); -void R_DrawTlatedAddColumnP_RGBA(); -void R_DrawAddClampColumnP_RGBA(); -void R_DrawAddClampTranslatedColumnP_RGBA(); -void R_DrawSubClampColumnP_RGBA(); -void R_DrawSubClampTranslatedColumnP_RGBA(); -void R_DrawRevSubClampColumnP_RGBA(); -void R_DrawRevSubClampTranslatedColumnP_RGBA(); +void R_FillColumn_rgba(); +void R_FillAddColumn_rgba(); +void R_FillAddClampColumn_rgba(); +void R_FillSubClampColumn_rgba(); +void R_FillRevSubClampColumn_rgba(); +void R_DrawAddColumn_rgba(); +void R_DrawTlatedAddColumn_rgba(); +void R_DrawAddClampColumn_rgba(); +void R_DrawAddClampTranslatedColumn_rgba(); +void R_DrawSubClampColumn_rgba(); +void R_DrawSubClampTranslatedColumn_rgba(); +void R_DrawRevSubClampColumn_rgba(); +void R_DrawRevSubClampTranslatedColumn_rgba(); -void R_DrawSpanP_RGBA(void); -void R_DrawSpanMaskedP_RGBA(void); -void R_DrawSpanTranslucentP_RGBA(); -void R_DrawSpanMaskedTranslucentP_RGBA(); -void R_DrawSpanAddClampP_RGBA(); -void R_DrawSpanMaskedAddClampP_RGBA(); -void R_FillSpan_RGBA(); +void R_DrawSpan_rgba(void); +void R_DrawSpanMasked_rgba(void); +void R_DrawSpanTranslucent_rgba(); +void R_DrawSpanMaskedTranslucent_rgba(); +void R_DrawSpanAddClamp_rgba(); +void R_DrawSpanMaskedAddClamp_rgba(); +void R_FillSpan_rgba(); -void R_DrawFogBoundary_RGBA(int x1, int x2, short *uclip, short *dclip); +void R_DrawFogBoundary_rgba(int x1, int x2, short *uclip, short *dclip); -DWORD vlinec1_RGBA(); -void vlinec4_RGBA(); -DWORD mvlinec1_RGBA(); -void mvlinec4_RGBA(); -fixed_t tmvline1_add_RGBA(); -void tmvline4_add_RGBA(); -fixed_t tmvline1_addclamp_RGBA(); -void tmvline4_addclamp_RGBA(); -fixed_t tmvline1_subclamp_RGBA(); -void tmvline4_subclamp_RGBA(); -fixed_t tmvline1_revsubclamp_RGBA(); -void tmvline4_revsubclamp_RGBA(); +DWORD vlinec1_rgba(); +void vlinec4_rgba(); +DWORD mvlinec1_rgba(); +void mvlinec4_rgba(); +fixed_t tmvline1_add_rgba(); +void tmvline4_add_rgba(); +fixed_t tmvline1_addclamp_rgba(); +void tmvline4_addclamp_rgba(); +fixed_t tmvline1_subclamp_rgba(); +void tmvline4_subclamp_rgba(); +fixed_t tmvline1_revsubclamp_rgba(); +void tmvline4_revsubclamp_rgba(); -void R_FillColumnHorizP_RGBA(); -void R_FillSpan_RGBA(); +void R_FillColumnHoriz_rgba(); +void R_FillSpan_rgba(); ///////////////////////////////////////////////////////////////////////////// // Multithreaded rendering infrastructure: diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 32d5080c5..8f6d2ca13 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -1629,171 +1629,171 @@ public: ///////////////////////////////////////////////////////////////////////////// // Copies one span at hx to the screen at sx. -void rt_copy1col_RGBA (int hx, int sx, int yl, int yh) +void rt_copy1col_rgba (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Copies all four spans to the screen starting at sx. -void rt_copy4cols_RGBA (int sx, int yl, int yh) +void rt_copy4cols_rgba (int sx, int yl, int yh) { // To do: we could do this with SSE using __m128i - rt_copy1col_RGBA(0, sx, yl, yh); - rt_copy1col_RGBA(1, sx + 1, yl, yh); - rt_copy1col_RGBA(2, sx + 2, yl, yh); - rt_copy1col_RGBA(3, sx + 3, yl, yh); + rt_copy1col_rgba(0, sx, yl, yh); + rt_copy1col_rgba(1, sx + 1, yl, yh); + rt_copy1col_rgba(2, sx + 2, yl, yh); + rt_copy1col_rgba(3, sx + 3, yl, yh); } // Maps one span at hx to the screen at sx. -void rt_map1col_RGBA (int hx, int sx, int yl, int yh) +void rt_map1col_rgba (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Maps all four spans to the screen starting at sx. -void rt_map4cols_RGBA (int sx, int yl, int yh) +void rt_map4cols_rgba (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } -void rt_Translate1col_RGBA(const BYTE *translation, int hx, int yl, int yh) +void rt_Translate1col_rgba(const BYTE *translation, int hx, int yl, int yh) { DrawerCommandQueue::QueueCommand(translation, hx, yl, yh); } -void rt_Translate4cols_RGBA(const BYTE *translation, int yl, int yh) +void rt_Translate4cols_rgba(const BYTE *translation, int yl, int yh) { DrawerCommandQueue::QueueCommand(translation, yl, yh); } // Translates one span at hx to the screen at sx. -void rt_tlate1col_RGBA (int hx, int sx, int yl, int yh) +void rt_tlate1col_rgba (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA(dc_translation, hx, yl, yh); + rt_Translate1col_rgba(dc_translation, hx, yl, yh); rt_map1col(hx, sx, yl, yh); } // Translates all four spans to the screen starting at sx. -void rt_tlate4cols_RGBA (int sx, int yl, int yh) +void rt_tlate4cols_rgba (int sx, int yl, int yh) { - rt_Translate4cols_RGBA(dc_translation, yl, yh); + rt_Translate4cols_rgba(dc_translation, yl, yh); rt_map4cols(sx, yl, yh); } // Adds one span at hx to the screen at sx without clamping. -void rt_add1col_RGBA (int hx, int sx, int yl, int yh) +void rt_add1col_rgba (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Adds all four spans to the screen starting at sx without clamping. -void rt_add4cols_RGBA (int sx, int yl, int yh) +void rt_add4cols_rgba (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and adds one span at hx to the screen at sx without clamping. -void rt_tlateadd1col_RGBA (int hx, int sx, int yl, int yh) +void rt_tlateadd1col_rgba (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA(dc_translation, hx, yl, yh); + rt_Translate1col_rgba(dc_translation, hx, yl, yh); rt_add1col(hx, sx, yl, yh); } // Translates and adds all four spans to the screen starting at sx without clamping. -void rt_tlateadd4cols_RGBA(int sx, int yl, int yh) +void rt_tlateadd4cols_rgba(int sx, int yl, int yh) { - rt_Translate4cols_RGBA(dc_translation, yl, yh); + rt_Translate4cols_rgba(dc_translation, yl, yh); rt_add4cols(sx, yl, yh); } // Shades one span at hx to the screen at sx. -void rt_shaded1col_RGBA (int hx, int sx, int yl, int yh) +void rt_shaded1col_rgba (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Shades all four spans to the screen starting at sx. -void rt_shaded4cols_RGBA (int sx, int yl, int yh) +void rt_shaded4cols_rgba (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Adds one span at hx to the screen at sx with clamping. -void rt_addclamp1col_RGBA (int hx, int sx, int yl, int yh) +void rt_addclamp1col_rgba (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Adds all four spans to the screen starting at sx with clamping. -void rt_addclamp4cols_RGBA (int sx, int yl, int yh) +void rt_addclamp4cols_rgba (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and adds one span at hx to the screen at sx with clamping. -void rt_tlateaddclamp1col_RGBA (int hx, int sx, int yl, int yh) +void rt_tlateaddclamp1col_rgba (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA(dc_translation, hx, yl, yh); - rt_addclamp1col_RGBA(hx, sx, yl, yh); + rt_Translate1col_rgba(dc_translation, hx, yl, yh); + rt_addclamp1col_rgba(hx, sx, yl, yh); } // Translates and adds all four spans to the screen starting at sx with clamping. -void rt_tlateaddclamp4cols_RGBA (int sx, int yl, int yh) +void rt_tlateaddclamp4cols_rgba (int sx, int yl, int yh) { - rt_Translate4cols_RGBA(dc_translation, yl, yh); + rt_Translate4cols_rgba(dc_translation, yl, yh); rt_addclamp4cols(sx, yl, yh); } // Subtracts one span at hx to the screen at sx with clamping. -void rt_subclamp1col_RGBA (int hx, int sx, int yl, int yh) +void rt_subclamp1col_rgba (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Subtracts all four spans to the screen starting at sx with clamping. -void rt_subclamp4cols_RGBA (int sx, int yl, int yh) +void rt_subclamp4cols_rgba (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and subtracts one span at hx to the screen at sx with clamping. -void rt_tlatesubclamp1col_RGBA (int hx, int sx, int yl, int yh) +void rt_tlatesubclamp1col_rgba (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA(dc_translation, hx, yl, yh); - rt_subclamp1col_RGBA(hx, sx, yl, yh); + rt_Translate1col_rgba(dc_translation, hx, yl, yh); + rt_subclamp1col_rgba(hx, sx, yl, yh); } // Translates and subtracts all four spans to the screen starting at sx with clamping. -void rt_tlatesubclamp4cols_RGBA (int sx, int yl, int yh) +void rt_tlatesubclamp4cols_rgba (int sx, int yl, int yh) { - rt_Translate4cols_RGBA(dc_translation, yl, yh); - rt_subclamp4cols_RGBA(sx, yl, yh); + rt_Translate4cols_rgba(dc_translation, yl, yh); + rt_subclamp4cols_rgba(sx, yl, yh); } // Subtracts one span at hx from the screen at sx with clamping. -void rt_revsubclamp1col_RGBA (int hx, int sx, int yl, int yh) +void rt_revsubclamp1col_rgba (int hx, int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Subtracts all four spans from the screen starting at sx with clamping. -void rt_revsubclamp4cols_RGBA (int sx, int yl, int yh) +void rt_revsubclamp4cols_rgba (int sx, int yl, int yh) { DrawerCommandQueue::QueueCommand(sx, yl, yh); } // Translates and subtracts one span at hx from the screen at sx with clamping. -void rt_tlaterevsubclamp1col_RGBA (int hx, int sx, int yl, int yh) +void rt_tlaterevsubclamp1col_rgba (int hx, int sx, int yl, int yh) { - rt_Translate1col_RGBA(dc_translation, hx, yl, yh); - rt_revsubclamp1col_RGBA(hx, sx, yl, yh); + rt_Translate1col_rgba(dc_translation, hx, yl, yh); + rt_revsubclamp1col_rgba(hx, sx, yl, yh); } // Translates and subtracts all four spans from the screen starting at sx with clamping. -void rt_tlaterevsubclamp4cols_RGBA (int sx, int yl, int yh) +void rt_tlaterevsubclamp4cols_rgba (int sx, int yl, int yh) { - rt_Translate4cols_RGBA(dc_translation, yl, yh); - rt_revsubclamp4cols_RGBA(sx, yl, yh); + rt_Translate4cols_rgba(dc_translation, yl, yh); + rt_revsubclamp4cols_rgba(sx, yl, yh); } // Before each pass through a rendering loop that uses these routines, @@ -1816,7 +1816,7 @@ void rt_span_coverage_rgba(int x, int start, int stop) // Stretches a column into a temporary buffer which is later // drawn to the screen along with up to three other columns. -void R_DrawColumnHorizP_RGBA (void) +void R_DrawColumnHoriz_rgba (void) { if (dc_count <= 0) return; @@ -1831,7 +1831,7 @@ void R_DrawColumnHorizP_RGBA (void) } // [RH] Just fills a column with a given color -void R_FillColumnHorizP_RGBA (void) +void R_FillColumnHoriz_rgba (void) { if (dc_count <= 0) return; diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 1cde16071..1a08d1793 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -477,7 +477,7 @@ void R_MapTiltedPlane_C (int y, int x1) #endif } -void R_MapTiltedPlane_RGBA (int y, int x1) +void R_MapTiltedPlane_rgba (int y, int x1) { int x2 = spanend[y]; @@ -509,7 +509,7 @@ void R_MapColoredPlane_C (int y, int x1) memset (ylookup[y] + x1 + dc_destorg, ds_color, (spanend[y] - x1 + 1)); } -void R_MapColoredPlane_RGBA(int y, int x1) +void R_MapColoredPlane_rgba(int y, int x1) { uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; int count = (spanend[y] - x1 + 1); diff --git a/src/r_plane.h b/src/r_plane.h index 7505ac995..b199d3477 100644 --- a/src/r_plane.h +++ b/src/r_plane.h @@ -97,9 +97,9 @@ extern void(*R_MapColoredPlane)(int y, int x1); extern void(*R_MapTiltedPlane)(int y, int x1); void R_MapTiltedPlane_C(int y, int x1); -void R_MapTiltedPlane_RGBA(int y, int x); +void R_MapTiltedPlane_rgba(int y, int x); void R_MapColoredPlane_C(int y, int x1); -void R_MapColoredPlane_RGBA(int y, int x1); +void R_MapColoredPlane_rgba(int y, int x1); visplane_t *R_FindPlane ( const secplane_t &height, diff --git a/src/r_things.cpp b/src/r_things.cpp index f1f29f160..0858dce2f 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -2676,7 +2676,7 @@ void R_DrawParticle_C (vissprite_t *vis) } } -void R_DrawParticle_RGBA(vissprite_t *vis) +void R_DrawParticle_rgba(vissprite_t *vis) { int spacing; uint32_t *dest; diff --git a/src/r_things.h b/src/r_things.h index 785729b09..f5cd30e00 100644 --- a/src/r_things.h +++ b/src/r_things.h @@ -98,7 +98,7 @@ struct particle_t; extern void(*R_DrawParticle)(vissprite_t *); void R_DrawParticle_C (vissprite_t *); -void R_DrawParticle_RGBA (vissprite_t *); +void R_DrawParticle_rgba (vissprite_t *); void R_ProjectParticle (particle_t *, const sector_t *sector, int shade, int fakeside); From 9c8c1e0ea51534d8c6d5fba8ed0c58a965aa88e8 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 14 Jun 2016 00:27:08 +0200 Subject: [PATCH 044/100] Fixed window transparency bug --- src/r_drawt_rgba.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 8f6d2ca13..d2d715c8d 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -588,6 +588,8 @@ class RtAdd4colsRGBACommand : public DrawerCommand fixed_t dc_light; ShadeConstants dc_shade_constants; BYTE *dc_colormap; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; public: RtAdd4colsRGBACommand(int sx, int yl, int yh) @@ -601,6 +603,8 @@ public: dc_light = ::dc_light; dc_shade_constants = ::dc_shade_constants; dc_colormap = ::dc_colormap; + dc_srcalpha = ::dc_srcalpha; + dc_destalpha = ::dc_destalpha; } #ifdef NO_SSE @@ -722,10 +726,10 @@ public: __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); do { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; // shade_pal_index: __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); From 77c4786b9d716ab018ec4b082490b6ed78f5cc36 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 14 Jun 2016 23:05:20 +0200 Subject: [PATCH 045/100] Minor code cleanup --- src/f_wipe.cpp | 5 +- src/r_draw.cpp | 57 +++-------- src/r_draw.h | 19 +++- src/r_draw_rgba.h | 213 +++++++++++++++++++++++++++++++++++++++++ src/r_drawt.cpp | 32 +++---- src/r_main.h | 223 ------------------------------------------- src/r_plane.cpp | 28 +++--- src/r_swrenderer.cpp | 2 +- src/r_things.cpp | 5 +- src/v_draw.cpp | 1 + 10 files changed, 277 insertions(+), 308 deletions(-) diff --git a/src/f_wipe.cpp b/src/f_wipe.cpp index 84b6036e4..aa9038eeb 100644 --- a/src/f_wipe.cpp +++ b/src/f_wipe.cpp @@ -78,7 +78,7 @@ bool wipe_initMelt (int ticks) int i, r; // copy start screen to main screen - screen->DrawBlock(0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); + screen->DrawBlock (0, 0, SCREENWIDTH, SCREENHEIGHT, (BYTE *)wipe_scr_start); // makes this wipe faster (in theory) // to have stuff in column-major format @@ -271,8 +271,7 @@ bool wipe_doBurn (int ticks) // Draw the screen int xstep, ystep, firex, firey; int x, y; - BYTE *to; - BYTE *fromold, *fromnew; + BYTE *to, *fromold, *fromnew; const int SHIFT = 16; xstep = (FIREWIDTH << SHIFT) / SCREENWIDTH; diff --git a/src/r_draw.cpp b/src/r_draw.cpp index ecb4441f8..4dcdc3e6b 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -63,7 +63,7 @@ extern int ST_Y; BYTE* viewimage; extern "C" { int ylookup[MAXHEIGHT]; -BYTE* dc_destorg; +BYTE *dc_destorg; } int scaledviewwidth; @@ -276,7 +276,7 @@ void R_DrawColumnP_C (void) { // Re-map color indices from wall texture column // using a lighting/special effects LUT. - *dest = colormap[source[frac >> FRACBITS]]; + *dest = colormap[source[frac>>FRACBITS]]; dest += pitch; frac += fracstep; @@ -321,13 +321,12 @@ void R_FillAddColumn_C (void) return; dest = dc_dest; - int pitch = dc_pitch; - DWORD *bg2rgb; DWORD fg; bg2rgb = dc_destblend; fg = dc_srccolor; + int pitch = dc_pitch; do { @@ -348,13 +347,12 @@ void R_FillAddClampColumn_C (void) return; dest = dc_dest; - int pitch = dc_pitch; - DWORD *bg2rgb; DWORD fg; bg2rgb = dc_destblend; fg = dc_srccolor; + int pitch = dc_pitch; do { @@ -381,13 +379,12 @@ void R_FillSubClampColumn_C (void) return; dest = dc_dest; - int pitch = dc_pitch; - DWORD *bg2rgb; DWORD fg; bg2rgb = dc_destblend; fg = dc_srccolor | 0x40100400; + int pitch = dc_pitch; do { @@ -413,13 +410,12 @@ void R_FillRevSubClampColumn_C (void) return; dest = dc_dest; - int pitch = dc_pitch; - DWORD *bg2rgb; DWORD fg; bg2rgb = dc_destblend; fg = dc_srccolor; + int pitch = dc_pitch; do { @@ -672,13 +668,14 @@ void R_DrawTranslatedColumnP_C (void) { *dest = colormap[translation[source[frac>>FRACBITS]]]; dest += pitch; + frac += fracstep; } while (--count); } } // Draw a column that is both translated and translucent -void R_DrawTlatedAddColumnP_C() +void R_DrawTlatedAddColumnP_C (void) { int count; BYTE *dest; @@ -772,15 +769,15 @@ void R_DrawAddClampColumnP_C () frac = dc_texturefrac; { - const BYTE *source = dc_source; BYTE *colormap = dc_colormap; + const BYTE *source = dc_source; int pitch = dc_pitch; DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; do { - DWORD a = fg2rgb[colormap[source[frac >> FRACBITS]]] + bg2rgb[*dest]; + DWORD a = fg2rgb[colormap[source[frac>>FRACBITS]]] + bg2rgb[*dest]; DWORD b = a; a |= 0x01f07c1f; @@ -788,7 +785,7 @@ void R_DrawAddClampColumnP_C () a &= 0x3fffffff; b = b - (b >> 5); a |= b; - *dest = RGB32k.All[a & (a >> 15)]; + *dest = RGB32k.All[a & (a>>15)]; dest += pitch; frac += fracstep; } while (--count); @@ -1190,9 +1187,6 @@ void R_DrawSpanP_C (void) } while (--count); } } -#endif - -#ifndef X86_ASM // [RH] Draw a span with holes void R_DrawSpanMaskedP_C (void) @@ -1282,8 +1276,6 @@ void R_DrawSpanTranslucentP_C (void) xstep = ds_xstep; ystep = ds_ystep; - uint32_t light = calc_light_multiplier(ds_light); - if (ds_xbits == 6 && ds_ybits == 6) { // 64x64 is the most common case by far, so special case it. @@ -1334,8 +1326,6 @@ void R_DrawSpanMaskedTranslucentP_C (void) DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; - uint32_t light = calc_light_multiplier(ds_light); - xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -1426,7 +1416,6 @@ void R_DrawSpanAddClampP_C (void) do { spot = ((xfrac>>(32-6-6))&(63*64)) + (yfrac>>(32-6)); - DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; DWORD b = a; @@ -1436,7 +1425,6 @@ void R_DrawSpanAddClampP_C (void) b = b - (b >> 5); a |= b; *dest++ = RGB32k.All[a & (a>>15)]; - xfrac += xstep; yfrac += ystep; } while (--count); @@ -1449,7 +1437,6 @@ void R_DrawSpanAddClampP_C (void) do { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - DWORD a = fg2rgb[colormap[source[spot]]] + bg2rgb[*dest]; DWORD b = a; @@ -1459,14 +1446,12 @@ void R_DrawSpanAddClampP_C (void) b = b - (b >> 5); a |= b; *dest++ = RGB32k.All[a & (a>>15)]; - xfrac += xstep; yfrac += ystep; } while (--count); } } - void R_DrawSpanMaskedAddClampP_C (void) { dsfixed_t xfrac; @@ -1481,8 +1466,6 @@ void R_DrawSpanMaskedAddClampP_C (void) DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; - uint32_t light = calc_light_multiplier(ds_light); - xfrac = ds_xfrac; yfrac = ds_yfrac; @@ -1552,7 +1535,7 @@ void R_DrawSpanMaskedAddClampP_C (void) // [RH] Just fill a span with a color void R_FillSpan_C (void) { - memset (ylookup[ds_y] + ds_x1 + dc_destorg, ds_color, (ds_x2 - ds_x1 + 1)); + memset (ylookup[ds_y] + ds_x1 + dc_destorg, ds_color, ds_x2 - ds_x1 + 1); } @@ -1759,7 +1742,7 @@ DWORD vlinec1 () do { - *dest = colormap[source[frac >> bits]]; + *dest = colormap[source[frac>>bits]]; frac += fracstep; dest += pitch; } while (--count); @@ -1830,9 +1813,7 @@ DWORD mvlinec1 () return frac; } -#endif -#if !defined(X86_ASM) void mvlinec4 () { BYTE *dest = dc_dest; @@ -1843,6 +1824,7 @@ void mvlinec4 () do { BYTE pix; + pix = bufplce[0][(place=vplce[0])>>bits]; if(pix) dest[0] = palookupoffse[0][pix]; vplce[0] = place+vince[0]; pix = bufplce[1][(place=vplce[1])>>bits]; if(pix) dest[1] = palookupoffse[1][pix]; vplce[1] = place+vince[1]; pix = bufplce[2][(place=vplce[2])>>bits]; if(pix) dest[2] = palookupoffse[2][pix]; vplce[2] = place+vince[2]; @@ -1879,7 +1861,6 @@ static void R_DrawFogBoundaryLine (int y, int x) int x2 = spanend[y]; BYTE *colormap = dc_colormap; BYTE *dest = ylookup[y] + dc_destorg; - do { dest[x] = colormap[dest[x]]; @@ -1996,8 +1977,6 @@ fixed_t tmvline1_add_C () DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; - uint32_t light = calc_light_multiplier(dc_light); - do { BYTE pix = source[frac>>bits]; @@ -2024,12 +2003,6 @@ void tmvline4_add_C () DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; - uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); - do { for (int i = 0; i < 4; ++i) @@ -2062,8 +2035,6 @@ fixed_t tmvline1_addclamp_C () DWORD *fg2rgb = dc_srcblend; DWORD *bg2rgb = dc_destblend; - uint32_t light = calc_light_multiplier(dc_light); - do { BYTE pix = source[frac>>bits]; diff --git a/src/r_draw.h b/src/r_draw.h index cea05e469..a31183405 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -32,7 +32,20 @@ extern "C" int fuzzpos; extern "C" int fuzzviewheight; struct FColormap; -struct ShadeConstants; + +struct ShadeConstants +{ + uint16_t light_alpha; + uint16_t light_red; + uint16_t light_green; + uint16_t light_blue; + uint16_t fade_alpha; + uint16_t fade_red; + uint16_t fade_green; + uint16_t fade_blue; + uint16_t desaturate; + bool simple_shade; +}; extern "C" int ylookup[MAXHEIGHT]; @@ -58,7 +71,7 @@ extern "C" fixed_t dc_destalpha; // first pixel in a column extern "C" const BYTE* dc_source; -extern "C" BYTE* dc_dest, *dc_destorg; +extern "C" BYTE *dc_dest, *dc_destorg; extern "C" int dc_count; extern "C" DWORD vplce[4]; @@ -68,7 +81,7 @@ extern "C" fixed_t palookuplight[4]; extern "C" const BYTE* bufplce[4]; // [RH] Temporary buffer for column drawing -extern "C" BYTE *dc_temp; +extern "C" BYTE *dc_temp; extern "C" unsigned int dc_tspans[4][MAXHEIGHT]; extern "C" unsigned int *dc_ctspan[4]; extern "C" unsigned int horizspans[4]; diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 9f07ff0bf..47ea75260 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -24,6 +24,7 @@ #define __R_DRAW_RGBA__ #include "r_draw.h" +#include "v_palette.h" #include #include #include @@ -273,4 +274,216 @@ public: void Execute(DrawerThread *thread) override; }; +///////////////////////////////////////////////////////////////////////////// +// Pixel shading macros and inline functions: + +// Give the compiler a strong hint we want these functions inlined: +#ifndef FORCEINLINE +#if defined(_MSC_VER) +#define FORCEINLINE __forceinline +#elif defined(__GNUC__) +#define FORCEINLINE __attribute__((always_inline)) inline +#else +#define FORCEINLINE inline +#endif +#endif + +// calculates the light constant passed to the shade_pal_index function +FORCEINLINE uint32_t calc_light_multiplier(dsfixed_t light) +{ + return 256 - (light >> (FRACBITS - 8)); +} + +// Calculates a ARGB8 color for the given palette index and light multiplier +FORCEINLINE uint32_t shade_pal_index_simple(uint32_t index, uint32_t light) +{ + const PalEntry &color = GPalette.BaseColors[index]; + uint32_t red = color.r; + uint32_t green = color.g; + uint32_t blue = color.b; + + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + +FORCEINLINE uint32_t shade_bgra_simple(uint32_t color, uint32_t light) +{ + uint32_t red = (color >> 16) & 0xff; + uint32_t green = (color >> 8) & 0xff; + uint32_t blue = color & 0xff; + + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + +// Calculates a ARGB8 color for the given palette index, light multiplier and dynamic colormap +FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants) +{ + const PalEntry &color = GPalette.BaseColors[index]; + uint32_t red = color.r; + uint32_t green = color.g; + uint32_t blue = color.b; + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; + + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; + + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; + + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + +FORCEINLINE uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConstants &constants) +{ + uint32_t red = (color >> 16) & 0xff; + uint32_t green = (color >> 8) & 0xff; + uint32_t blue = color & 0xff; + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; + + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; + + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; + + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + +FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) +{ + uint32_t fg_alpha = (fg >> 24) & 0xff; + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t alpha = fg_alpha + (fg_alpha >> 7); // 255 -> 256 + uint32_t inv_alpha = 256 - alpha; + + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = bg & 0xff; + + uint32_t red = ((fg_red * alpha) + (bg_red * inv_alpha)) / 256; + uint32_t green = ((fg_green * alpha) + (bg_green * inv_alpha)) / 256; + uint32_t blue = ((fg_blue * alpha) + (bg_blue * inv_alpha)) / 256; + + return 0xff000000 | (red << 16) | (green << 8) | blue; +} + +// Calculate constants for a simple shade +#define SSE_SHADE_SIMPLE_INIT(light) \ + __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ + __m128i mlight_lo = mlight_hi; + +// Calculate constants for a simple shade with different light levels for each pixel +#define SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0) \ + __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ + __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); + +// Simple shade 4 pixels +#define SSE_SHADE_SIMPLE(fg) { \ + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \ + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ + fg_hi = _mm_mullo_epi16(fg_hi, mlight_hi); \ + fg_hi = _mm_srli_epi16(fg_hi, 8); \ + fg_lo = _mm_mullo_epi16(fg_lo, mlight_lo); \ + fg_lo = _mm_srli_epi16(fg_lo, 8); \ + fg = _mm_packus_epi16(fg_lo, fg_hi); \ +} + +// Calculate constants for a complex shade +#define SSE_SHADE_INIT(light, shade_constants) \ + __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ + __m128i mlight_lo = mlight_hi; \ + __m128i color = _mm_set_epi16( \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ + __m128i fade = _mm_set_epi16( \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ + __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ + __m128i fade_amount_lo = fade_amount_hi; \ + __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ + +// Calculate constants for a complex shade with different light levels for each pixel +#define SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants) \ + __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ + __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); \ + __m128i color = _mm_set_epi16( \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ + __m128i fade = _mm_set_epi16( \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ + __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ + __m128i fade_amount_lo = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_lo)); \ + __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ + +// Complex shade 4 pixels +#define SSE_SHADE(fg, shade_constants) { \ + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \ + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ + \ + __m128i intensity_hi = _mm_mullo_epi16(fg_hi, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \ + uint16_t intensity_hi0 = ((_mm_extract_epi16(intensity_hi, 2) + _mm_extract_epi16(intensity_hi, 1) + _mm_extract_epi16(intensity_hi, 0)) >> 8) * shade_constants.desaturate; \ + uint16_t intensity_hi1 = ((_mm_extract_epi16(intensity_hi, 6) + _mm_extract_epi16(intensity_hi, 5) + _mm_extract_epi16(intensity_hi, 4)) >> 8) * shade_constants.desaturate; \ + intensity_hi = _mm_set_epi16(intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi0, intensity_hi0, intensity_hi0, intensity_hi0); \ + \ + fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, inv_desaturate), intensity_hi), 8); \ + fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mlight_hi), fade_amount_hi), 8); \ + fg_hi = _mm_srli_epi16(_mm_mullo_epi16(fg_hi, color), 8); \ + \ + __m128i intensity_lo = _mm_mullo_epi16(fg_lo, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \ + uint16_t intensity_lo0 = ((_mm_extract_epi16(intensity_lo, 2) + _mm_extract_epi16(intensity_lo, 1) + _mm_extract_epi16(intensity_lo, 0)) >> 8) * shade_constants.desaturate; \ + uint16_t intensity_lo1 = ((_mm_extract_epi16(intensity_lo, 6) + _mm_extract_epi16(intensity_lo, 5) + _mm_extract_epi16(intensity_lo, 4)) >> 8) * shade_constants.desaturate; \ + intensity_lo = _mm_set_epi16(intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo0, intensity_lo0, intensity_lo0, intensity_lo0); \ + \ + fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, inv_desaturate), intensity_lo), 8); \ + fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mlight_lo), fade_amount_lo), 8); \ + fg_lo = _mm_srli_epi16(_mm_mullo_epi16(fg_lo, color), 8); \ + \ + fg = _mm_packus_epi16(fg_lo, fg_hi); \ +} + #endif diff --git a/src/r_drawt.cpp b/src/r_drawt.cpp index c829c2dc4..837093044 100644 --- a/src/r_drawt.cpp +++ b/src/r_drawt.cpp @@ -340,13 +340,13 @@ void rt_add1col_c (int hx, int sx, int yl, int yh) return; count++; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; colormap = dc_colormap; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; do { DWORD fg = colormap[*source]; DWORD bg = *dest; @@ -374,14 +374,13 @@ void rt_add4cols_c (int sx, int yl, int yh) return; count++; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4]; pitch = dc_pitch; colormap = dc_colormap; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - do { DWORD fg = colormap[source[0]]; DWORD bg = dest[0]; @@ -434,6 +433,7 @@ void rt_tlateadd4cols_c (int sx, int yl, int yh) // Shades one span at hx to the screen at sx. void rt_shaded1col_c (int hx, int sx, int yl, int yh) { + DWORD *fgstart; BYTE *colormap; BYTE *source; BYTE *dest; @@ -445,14 +445,12 @@ void rt_shaded1col_c (int hx, int sx, int yl, int yh) return; count++; + fgstart = &Col2RGB8[0][dc_color]; colormap = dc_colormap; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; - DWORD *fgstart; - fgstart = &Col2RGB8[0][dc_color]; - do { DWORD val = colormap[*source]; DWORD fg = fgstart[val<<8]; @@ -466,6 +464,7 @@ void rt_shaded1col_c (int hx, int sx, int yl, int yh) // Shades all four spans to the screen starting at sx. void rt_shaded4cols_c (int sx, int yl, int yh) { + DWORD *fgstart; BYTE *colormap; BYTE *source; BYTE *dest; @@ -477,14 +476,12 @@ void rt_shaded4cols_c (int sx, int yl, int yh) return; count++; + fgstart = &Col2RGB8[0][dc_color]; colormap = dc_colormap; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4]; pitch = dc_pitch; - DWORD *fgstart; - fgstart = &Col2RGB8[0][dc_color]; - do { DWORD val; @@ -523,14 +520,13 @@ void rt_addclamp1col_c (int hx, int sx, int yl, int yh) return; count++; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; colormap = dc_colormap; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; - do { DWORD a = fg2rgb[colormap[*source]] + bg2rgb[*dest]; DWORD b = a; @@ -639,13 +635,13 @@ void rt_subclamp1col_c (int hx, int sx, int yl, int yh) return; count++; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4 + hx]; pitch = dc_pitch; colormap = dc_colormap; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; do { DWORD a = (fg2rgb[colormap[*source]] | 0x40100400) - bg2rgb[*dest]; DWORD b = a; @@ -674,13 +670,13 @@ void rt_subclamp4cols_c (int sx, int yl, int yh) return; count++; + DWORD *fg2rgb = dc_srcblend; + DWORD *bg2rgb = dc_destblend; dest = ylookup[yl] + sx + dc_destorg; source = &dc_temp[yl*4]; pitch = dc_pitch; colormap = dc_colormap; - DWORD *fg2rgb = dc_srcblend; - DWORD *bg2rgb = dc_destblend; do { DWORD a = (fg2rgb[colormap[source[0]]] | 0x40100400) - bg2rgb[dest[0]]; DWORD b = a; diff --git a/src/r_main.h b/src/r_main.h index d71d44fe1..fa8fe0bb1 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -90,229 +90,6 @@ extern bool r_dontmaplines; // Converts fixedlightlev into a shade value #define FIXEDLIGHT2SHADE(lightlev) (((lightlev) >> COLORMAPSHIFT) << FRACBITS) -struct ShadeConstants -{ - uint16_t light_alpha; - uint16_t light_red; - uint16_t light_green; - uint16_t light_blue; - uint16_t fade_alpha; - uint16_t fade_red; - uint16_t fade_green; - uint16_t fade_blue; - uint16_t desaturate; - bool simple_shade; -}; - -// calculates the light constant passed to the shade_pal_index function -inline uint32_t calc_light_multiplier(dsfixed_t light) -{ - return 256 - (light >> (FRACBITS - 8)); -} - -// Give the compiler a strong hint we want these functions inlined: -#ifndef FORCEINLINE -#if defined(_MSC_VER) -#define FORCEINLINE __forceinline -#elif defined(__GNUC__) -#define FORCEINLINE __attribute__((always_inline)) inline -#else -#define FORCEINLINE inline -#endif -#endif - -// Calculates a ARGB8 color for the given palette index and light multiplier -FORCEINLINE uint32_t shade_pal_index_simple(uint32_t index, uint32_t light) -{ - const PalEntry &color = GPalette.BaseColors[index]; - uint32_t red = color.r; - uint32_t green = color.g; - uint32_t blue = color.b; - - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; - - return 0xff000000 | (red << 16) | (green << 8) | blue; -} - -FORCEINLINE uint32_t shade_bgra_simple(uint32_t color, uint32_t light) -{ - uint32_t red = (color >> 16) & 0xff; - uint32_t green = (color >> 8) & 0xff; - uint32_t blue = color & 0xff; - - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; - - return 0xff000000 | (red << 16) | (green << 8) | blue; -} - -// Calculates a ARGB8 color for the given palette index, light multiplier and dynamic colormap -FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants) -{ - const PalEntry &color = GPalette.BaseColors[index]; - uint32_t red = color.r; - uint32_t green = color.g; - uint32_t blue = color.b; - if (constants.simple_shade) - { - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; - } - else - { - uint32_t inv_light = 256 - light; - uint32_t inv_desaturate = 256 - constants.desaturate; - - uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; - - red = (red * inv_desaturate + intensity) / 256; - green = (green * inv_desaturate + intensity) / 256; - blue = (blue * inv_desaturate + intensity) / 256; - - red = (constants.fade_red * inv_light + red * light) / 256; - green = (constants.fade_green * inv_light + green * light) / 256; - blue = (constants.fade_blue * inv_light + blue * light) / 256; - - red = (red * constants.light_red) / 256; - green = (green * constants.light_green) / 256; - blue = (blue * constants.light_blue) / 256; - } - return 0xff000000 | (red << 16) | (green << 8) | blue; -} - -FORCEINLINE uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConstants &constants) -{ - uint32_t red = (color >> 16) & 0xff; - uint32_t green = (color >> 8) & 0xff; - uint32_t blue = color & 0xff; - if (constants.simple_shade) - { - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; - } - else - { - uint32_t inv_light = 256 - light; - uint32_t inv_desaturate = 256 - constants.desaturate; - - uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; - - red = (red * inv_desaturate + intensity) / 256; - green = (green * inv_desaturate + intensity) / 256; - blue = (blue * inv_desaturate + intensity) / 256; - - red = (constants.fade_red * inv_light + red * light) / 256; - green = (constants.fade_green * inv_light + green * light) / 256; - blue = (constants.fade_blue * inv_light + blue * light) / 256; - - red = (red * constants.light_red) / 256; - green = (green * constants.light_green) / 256; - blue = (blue * constants.light_blue) / 256; - } - return 0xff000000 | (red << 16) | (green << 8) | blue; -} - -FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) -{ - uint32_t fg_alpha = (fg >> 24) & 0xff; - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t alpha = fg_alpha + (fg_alpha >> 7); // 255 -> 256 - uint32_t inv_alpha = 256 - alpha; - - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = bg & 0xff; - - uint32_t red = ((fg_red * alpha) + (bg_red * inv_alpha)) / 256; - uint32_t green = ((fg_green * alpha) + (bg_green * inv_alpha)) / 256; - uint32_t blue = ((fg_blue * alpha) + (bg_blue * inv_alpha)) / 256; - - return 0xff000000 | (red << 16) | (green << 8) | blue; -} - -// Calculate constants for a simple shade -#define SSE_SHADE_SIMPLE_INIT(light) \ - __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ - __m128i mlight_lo = mlight_hi; - -// Calculate constants for a simple shade with different light levels for each pixel -#define SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0) \ - __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ - __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); - -// Simple shade 4 pixels -#define SSE_SHADE_SIMPLE(fg) { \ - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \ - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ - fg_hi = _mm_mullo_epi16(fg_hi, mlight_hi); \ - fg_hi = _mm_srli_epi16(fg_hi, 8); \ - fg_lo = _mm_mullo_epi16(fg_lo, mlight_lo); \ - fg_lo = _mm_srli_epi16(fg_lo, 8); \ - fg = _mm_packus_epi16(fg_lo, fg_hi); \ -} - -// Calculate constants for a complex shade -#define SSE_SHADE_INIT(light, shade_constants) \ - __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ - __m128i mlight_lo = mlight_hi; \ - __m128i color = _mm_set_epi16( \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ - __m128i fade = _mm_set_epi16( \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ - __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ - __m128i fade_amount_lo = fade_amount_hi; \ - __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ - -// Calculate constants for a complex shade with different light levels for each pixel -#define SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants) \ - __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ - __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); \ - __m128i color = _mm_set_epi16( \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ - __m128i fade = _mm_set_epi16( \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ - __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ - __m128i fade_amount_lo = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_lo)); \ - __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ - -// Complex shade 4 pixels -#define SSE_SHADE(fg, shade_constants) { \ - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \ - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ - \ - __m128i intensity_hi = _mm_mullo_epi16(fg_hi, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \ - uint16_t intensity_hi0 = ((_mm_extract_epi16(intensity_hi, 2) + _mm_extract_epi16(intensity_hi, 1) + _mm_extract_epi16(intensity_hi, 0)) >> 8) * shade_constants.desaturate; \ - uint16_t intensity_hi1 = ((_mm_extract_epi16(intensity_hi, 6) + _mm_extract_epi16(intensity_hi, 5) + _mm_extract_epi16(intensity_hi, 4)) >> 8) * shade_constants.desaturate; \ - intensity_hi = _mm_set_epi16(intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi1, intensity_hi0, intensity_hi0, intensity_hi0, intensity_hi0); \ - \ - fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, inv_desaturate), intensity_hi), 8); \ - fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mlight_hi), fade_amount_hi), 8); \ - fg_hi = _mm_srli_epi16(_mm_mullo_epi16(fg_hi, color), 8); \ - \ - __m128i intensity_lo = _mm_mullo_epi16(fg_lo, _mm_set_epi16(0, 77, 143, 37, 0, 77, 143, 37)); \ - uint16_t intensity_lo0 = ((_mm_extract_epi16(intensity_lo, 2) + _mm_extract_epi16(intensity_lo, 1) + _mm_extract_epi16(intensity_lo, 0)) >> 8) * shade_constants.desaturate; \ - uint16_t intensity_lo1 = ((_mm_extract_epi16(intensity_lo, 6) + _mm_extract_epi16(intensity_lo, 5) + _mm_extract_epi16(intensity_lo, 4)) >> 8) * shade_constants.desaturate; \ - intensity_lo = _mm_set_epi16(intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo1, intensity_lo0, intensity_lo0, intensity_lo0, intensity_lo0); \ - \ - fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, inv_desaturate), intensity_lo), 8); \ - fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mlight_lo), fade_amount_lo), 8); \ - fg_lo = _mm_srli_epi16(_mm_mullo_epi16(fg_lo, color), 8); \ - \ - fg = _mm_packus_epi16(fg_lo, fg_hi); \ -} - extern bool r_swtruecolor; extern double GlobVis; diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 1a08d1793..807066f77 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -58,6 +58,7 @@ #include "r_3dfloors.h" #include "v_palette.h" #include "r_data/colormaps.h" +#include "r_draw_rgba.h" #ifdef _MSC_VER #pragma warning(disable:4244) @@ -506,7 +507,7 @@ void R_MapTiltedPlane_rgba (int y, int x1) void R_MapColoredPlane_C (int y, int x1) { - memset (ylookup[y] + x1 + dc_destorg, ds_color, (spanend[y] - x1 + 1)); + memset (ylookup[y] + x1 + dc_destorg, ds_color, spanend[y] - x1 + 1); } void R_MapColoredPlane_rgba(int y, int x1) @@ -1710,7 +1711,7 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t // //========================================================================== -void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) +void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) { static const float ifloatpow2[16] = { @@ -1745,7 +1746,7 @@ void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t a // p is the texture origin in view space // Don't add in the offsets at this stage, because doing so can result in // errors if the flat is rotated. - ang = M_PI * 3 / 2 - ViewAngle.Radians(); + ang = M_PI*3/2 - ViewAngle.Radians(); cosine = cos(ang), sine = sin(ang); p[0] = ViewPos.X * cosine - ViewPos.Y * sine; p[2] = ViewPos.X * sine + ViewPos.Y * cosine; @@ -1756,25 +1757,25 @@ void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t a cosine = cos(ang), sine = sin(ang); m[0] = yscale * cosine; m[2] = yscale * sine; - // m[1] = pl->height.ZatPointF (0, iyscale) - pl->height.ZatPointF (0,0)); - // VectorScale2 (m, 64.f/VectorLength(m)); +// m[1] = pl->height.ZatPointF (0, iyscale) - pl->height.ZatPointF (0,0)); +// VectorScale2 (m, 64.f/VectorLength(m)); - // n is the u direction vector in view space + // n is the u direction vector in view space #if 0 //let's use the sin/cosine we already know instead of computing new ones - ang += M_PI / 2 - n[0] = -xscale * cos(ang); + ang += M_PI/2 + n[0] = -xscale * cos(ang); n[2] = -xscale * sin(ang); #else n[0] = xscale * sine; n[2] = -xscale * cosine; #endif - // n[1] = pl->height.ZatPointF (ixscale, 0) - pl->height.ZatPointF (0,0)); - // VectorScale2 (n, 64.f/VectorLength(n)); +// n[1] = pl->height.ZatPointF (ixscale, 0) - pl->height.ZatPointF (0,0)); +// VectorScale2 (n, 64.f/VectorLength(n)); - // This code keeps the texture coordinates constant across the x,y plane no matter - // how much you slope the surface. Use the commented-out code above instead to keep - // the textures a constant size across the surface's plane instead. + // This code keeps the texture coordinates constant across the x,y plane no matter + // how much you slope the surface. Use the commented-out code above instead to keep + // the textures a constant size across the surface's plane instead. cosine = cos(planeang), sine = sin(planeang); m[1] = pl->height.ZatPoint(ViewPos.X + yscale * sine, ViewPos.Y + yscale * cosine) - zeroheight; n[1] = pl->height.ZatPoint(ViewPos.X - xscale * cosine, ViewPos.Y + xscale * sine) - zeroheight; @@ -1807,7 +1808,6 @@ void R_DrawTiltedPlane(visplane_t *pl, double _xscale, double _yscale, fixed_t a if (pl->height.fC() > 0) planelightfloat = -planelightfloat; - ds_light = 0; if (fixedlightlev >= 0) { R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index fbbd65b17..c4347236d 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -96,7 +96,7 @@ void FSoftwareRenderer::PrecacheTexture(FTexture *tex, int cache) else if (cache != 0) { if (r_swtruecolor) - tex->GetPixels(); + tex->GetPixelsBgra(); else tex->GetPixels (); } diff --git a/src/r_things.cpp b/src/r_things.cpp index 0858dce2f..836f58690 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -2612,8 +2612,10 @@ static void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis) void R_DrawParticle_C (vissprite_t *vis) { + DWORD *bg2rgb; int spacing; BYTE *dest; + DWORD fg; BYTE color = vis->Style.BaseColormap->Maps[(vis->Style.ColormapNum << COLORMAPSHIFT) + vis->startfrac]; int yl = vis->y1; int ycount = vis->y2 - yl + 1; @@ -2622,9 +2624,6 @@ void R_DrawParticle_C (vissprite_t *vis) R_DrawMaskedSegsBehindParticle (vis); - DWORD *bg2rgb; - DWORD fg; - // vis->renderflags holds translucency level (0-255) { fixed_t fglevel, bglevel; diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 02ba591b6..6a8dad047 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -44,6 +44,7 @@ #include "r_utility.h" #ifndef NO_SWRENDER #include "r_draw.h" +#include "r_draw_rgba.h" #include "r_main.h" #include "r_things.h" #endif From 312776621e194e36f7ef1b01d36942929ff241bf Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 16 Jun 2016 06:47:30 +0200 Subject: [PATCH 046/100] Added DrawerContext class --- src/CMakeLists.txt | 1 + src/r_bsp.cpp | 4 +- src/r_draw.cpp | 798 ++++++++++++++++++++++++++++++++++++++- src/r_draw.h | 89 ++++- src/r_draw_rgba.cpp | 169 +++++++++ src/r_draw_rgba.h | 7 + src/r_drawer_context.cpp | 464 +++++++++++++++++++++++ src/r_drawer_context.h | 123 ++++++ src/r_drawt.cpp | 22 +- src/r_drawt_rgba.cpp | 2 + src/r_main.cpp | 14 +- src/r_main.h | 16 - src/r_plane.cpp | 454 ++-------------------- src/r_plane.h | 8 - src/r_segs.cpp | 541 +++++--------------------- src/r_swrenderer.cpp | 1 + src/r_things.cpp | 382 +++++-------------- src/r_things.h | 9 +- src/v_draw.cpp | 109 +++--- 19 files changed, 1922 insertions(+), 1291 deletions(-) create mode 100644 src/r_drawer_context.cpp create mode 100644 src/r_drawer_context.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8c0a30ea0..49152b785 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -892,6 +892,7 @@ set( FASTMATH_PCH_SOURCES r_swrenderer.cpp r_3dfloors.cpp r_bsp.cpp + r_drawer_context.cpp r_draw.cpp r_draw_rgba.cpp r_drawt.cpp diff --git a/src/r_bsp.cpp b/src/r_bsp.cpp index 934d2d3e5..2b94b1e95 100644 --- a/src/r_bsp.cpp +++ b/src/r_bsp.cpp @@ -41,7 +41,7 @@ #include "r_local.h" #include "r_main.h" #include "r_plane.h" -#include "r_draw.h" +#include "r_drawer_context.h" #include "r_things.h" #include "r_3dfloors.h" #include "a_sharedglobal.h" @@ -545,7 +545,7 @@ void R_AddLine (seg_t *line) curline = line; // [RH] Color if not texturing line - dc_color = (((int)(line - segs) * 8) + 4) & 255; + DrawerContext::SetFlatColor((((int)(line - segs) * 8) + 4) & 255); pt1 = line->v1->fPos() - ViewPos; pt2 = line->v2->fPos() - ViewPos; diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 4dcdc3e6b..19195e907 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -23,6 +23,8 @@ // //----------------------------------------------------------------------------- +#define DRAWER_INTERNALS + #include #include "templates.h" @@ -40,6 +42,8 @@ #include "r_data/colormaps.h" #include "r_plane.h" #include "r_draw_rgba.h" +#include "r_drawer_context.h" +#include "d_net.h" #include "gi.h" #include "stats.h" @@ -66,6 +70,7 @@ int ylookup[MAXHEIGHT]; BYTE *dc_destorg; } int scaledviewwidth; +DCanvas *dc_canvas; // [RH] Pointers to the different column drawers. // These get changed depending on the current @@ -97,9 +102,9 @@ void (*R_DrawSpanMaskedAddClamp)(void); void (*R_FillSpan)(void); void (*R_FillColumnHoriz)(void); void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip); -void (*R_MapTiltedPlane)(int y, int x1); -void (*R_MapColoredPlane)(int y, int x1); -void (*R_DrawParticle)(vissprite_t *); +void (*R_DrawTiltedSpan)(int y, int x1, int x2); +void (*R_DrawColoredSpan)(int y, int x1, int x2); +void (*R_FillTransColumn)(int x, int y1, int y2, int color, int alpha); fixed_t (*tmvline1_add)(); void (*tmvline4_add)(); fixed_t (*tmvline1_addclamp)(); @@ -134,6 +139,24 @@ void (*rt_tlatesubclamp4cols)(int sx, int yl, int yh); void (*rt_tlaterevsubclamp4cols)(int sx, int yl, int yh); void (*rt_initcols)(BYTE *buffer); void (*rt_span_coverage)(int x, int start, int stop); +void (*colfunc) (void); +void (*basecolfunc) (void); +void (*fuzzcolfunc) (void); +void (*transcolfunc) (void); +void (*spanfunc) (void); +void (*hcolfunc_pre) (void); +void (*hcolfunc_post1) (int hx, int sx, int yl, int yh); +void (*hcolfunc_post2) (int hx, int sx, int yl, int yh); +void (*hcolfunc_post4) (int sx, int yl, int yh); + +extern "C" void R_DrawTiltedPlane_ASM(int y, int x1); +#ifdef X86_ASM +extern "C" void R_SetSpanSource_ASM(const BYTE *flat); +extern "C" void R_SetSpanSize_ASM(int xbits, int ybits); +extern "C" void R_SetSpanColormap_ASM(BYTE *colormap); +extern "C" void R_SetTiltedSpanSource_ASM(const BYTE *flat); +extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource; +#endif // // R_DrawColumn @@ -1040,13 +1063,6 @@ const BYTE* ds_source; // just for profiling int dscount; - -#ifdef X86_ASM -extern "C" void R_SetSpanSource_ASM (const BYTE *flat); -extern "C" void R_SetSpanSize_ASM (int xbits, int ybits); -extern "C" void R_SetSpanColormap_ASM (BYTE *colormap); -extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource; -#endif } //========================================================================== @@ -1076,9 +1092,8 @@ void R_SetSpanSource(const BYTE *pixels) // //========================================================================== -void R_SetSpanColormap(FDynamicColormap *colormap, int shade) +void R_SetSpanColormap() { - R_SetDSColorMapLight(colormap, 0, shade); #ifdef X86_ASM if (!r_swtruecolor && ds_colormap != ds_curcolormap) { @@ -2297,9 +2312,9 @@ void R_InitColumnDrawers () R_FillColumnHoriz = R_FillColumnHoriz_rgba; R_DrawFogBoundary = R_DrawFogBoundary_rgba; - R_MapTiltedPlane = R_MapTiltedPlane_rgba; - R_MapColoredPlane = R_MapColoredPlane_rgba; - R_DrawParticle = R_DrawParticle_rgba; + R_DrawTiltedSpan = R_DrawTiltedSpan_rgba; + R_DrawColoredSpan = R_DrawColoredSpan_rgba; + R_FillTransColumn = R_FillTransColumn_rgba; tmvline1_add = tmvline1_add_rgba; tmvline4_add = tmvline4_add_rgba; @@ -2394,9 +2409,15 @@ void R_InitColumnDrawers () R_FillColumnHoriz = R_FillColumnHorizP_C; R_DrawFogBoundary = R_DrawFogBoundary_C; - R_MapTiltedPlane = R_MapTiltedPlane_C; - R_MapColoredPlane = R_MapColoredPlane_C; - R_DrawParticle = R_DrawParticle_C; + R_DrawColoredSpan = R_DrawColoredSpan_C; + R_FillTransColumn = R_FillTransColumn_C; + +#ifdef X86_ASM + // To do: update R_DrawTiltedPlane_ASM to use x2 rather than spanend[y] + R_DrawTiltedSpan = [](int y, int x1, int x2) { R_DrawTiltedPlane_ASM(y, x1); }; +#else + R_DrawTiltedSpan = R_DrawTiltedSpan_C; +#endif tmvline1_add = tmvline1_add_C; tmvline4_add = tmvline4_add_C; @@ -2829,3 +2850,744 @@ void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade) ds_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); } } + +///////////////////////////////////////////////////////////////////////////// + +FVector3 ds_plane_sz, ds_plane_su, ds_plane_sv; +bool ds_plane_shade; +float ds_planelightfloat; +fixed_t ds_pviewx, ds_pviewy; +int ds_planeshade; +extern "C" BYTE *tiltlighting[MAXWIDTH]; + +extern "C" { void R_CalcTiltedLighting(double lval, double lend, int width); } + +#ifdef _MSC_VER +#pragma warning(disable:4244) // warning C4244: conversion from 'SQWORD' to 'DWORD', possible loss of data +#endif + +//========================================================================== +// +// R_CalcTiltedLighting +// +// Calculates the lighting for one row of a tilted plane. If the definition +// of GETPALOOKUP changes, this needs to change, too. +// +//========================================================================== + +extern "C" { +void R_CalcTiltedLighting (double lval, double lend, int width) +{ + double lstep; + BYTE *lightfiller; + BYTE *basecolormapdata = ds_fcolormap->Maps; + int i = 0; + + if (width == 0 || lval == lend) + { // Constant lighting + lightfiller = basecolormapdata + (GETPALOOKUP(lval, ds_planeshade) << COLORMAPSHIFT); + } + else + { + lstep = (lend - lval) / width; + if (lval >= MAXLIGHTVIS) + { // lval starts "too bright". + lightfiller = basecolormapdata + (GETPALOOKUP(lval, ds_planeshade) << COLORMAPSHIFT); + for (; i <= width && lval >= MAXLIGHTVIS; ++i) + { + tiltlighting[i] = lightfiller; + lval += lstep; + } + } + if (lend >= MAXLIGHTVIS) + { // lend ends "too bright". + lightfiller = basecolormapdata + (GETPALOOKUP(lend, ds_planeshade) << COLORMAPSHIFT); + for (; width > i && lend >= MAXLIGHTVIS; --width) + { + tiltlighting[width] = lightfiller; + lend -= lstep; + } + } + if (width > 0) + { + lval = FIXED2DBL(ds_planeshade) - lval; + lend = FIXED2DBL(ds_planeshade) - lend; + lstep = (lend - lval) / width; + if (lstep < 0) + { // Going from dark to light + if (lval < 1.) + { // All bright + lightfiller = basecolormapdata; + } + else + { + if (lval >= NUMCOLORMAPS) + { // Starts beyond the dark end + BYTE *clight = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT); + while (lval >= NUMCOLORMAPS && i <= width) + { + tiltlighting[i++] = clight; + lval += lstep; + } + if (i > width) + return; + } + while (i <= width && lval >= 0) + { + tiltlighting[i++] = basecolormapdata + (xs_ToInt(lval) << COLORMAPSHIFT); + lval += lstep; + } + lightfiller = basecolormapdata; + } + } + else + { // Going from light to dark + if (lval >= (NUMCOLORMAPS-1)) + { // All dark + lightfiller = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT); + } + else + { + while (lval < 0 && i <= width) + { + tiltlighting[i++] = basecolormapdata; + lval += lstep; + } + if (i > width) + return; + while (i <= width && lval < (NUMCOLORMAPS-1)) + { + tiltlighting[i++] = basecolormapdata + (xs_ToInt(lval) << COLORMAPSHIFT); + lval += lstep; + } + lightfiller = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT); + } + } + } + } + for (; i <= width; i++) + { + tiltlighting[i] = lightfiller; + } +} +} // extern "C" + +void R_DrawTiltedSpan_C (int y, int x1, int x2) +{ + int width = x2 - x1; + double iz, uz, vz; + BYTE *fb; + DWORD u, v; + int i; + + iz = ds_plane_sz[2] + ds_plane_sz[1]*(centery-y) + ds_plane_sz[0]*(x1-centerx); + + // Lighting is simple. It's just linear interpolation from start to end + if (ds_plane_shade) + { + uz = (iz + ds_plane_sz[0]*width) * ds_planelightfloat; + vz = iz * ds_planelightfloat; + R_CalcTiltedLighting (vz, uz, width); + } + + uz = ds_plane_su[2] + ds_plane_su[1]*(centery-y) + ds_plane_su[0]*(x1-centerx); + vz = ds_plane_sv[2] + ds_plane_sv[1]*(centery-y) + ds_plane_sv[0]*(x1-centerx); + + fb = ylookup[y] + x1 + dc_destorg; + + BYTE vshift = 32 - ds_ybits; + BYTE ushift = vshift - ds_xbits; + int umask = ((1 << ds_xbits) - 1) << ds_ybits; + +#if 0 // The "perfect" reference version of this routine. Pretty slow. + // Use it only to see how things are supposed to look. + i = 0; + do + { + double z = 1.f/iz; + + u = SQWORD(uz*z) + ds_pviewx; + v = SQWORD(vz*z) + ds_pviewy; + R_SetDSColorMapLight(tiltlighting[i], 0, 0); + fb[i++] = ds_colormap[ds_source[(v >> vshift) | ((u >> ushift) & umask)]]; + iz += ds_plane_sz[0]; + uz += ds_plane_su[0]; + vz += ds_plane_sv[0]; + } while (--width >= 0); +#else +//#define SPANSIZE 32 +//#define INVSPAN 0.03125f +//#define SPANSIZE 8 +//#define INVSPAN 0.125f +#define SPANSIZE 16 +#define INVSPAN 0.0625f + + double startz = 1.f/iz; + double startu = uz*startz; + double startv = vz*startz; + double izstep, uzstep, vzstep; + + izstep = ds_plane_sz[0] * SPANSIZE; + uzstep = ds_plane_su[0] * SPANSIZE; + vzstep = ds_plane_sv[0] * SPANSIZE; + x1 = 0; + width++; + + while (width >= SPANSIZE) + { + iz += izstep; + uz += uzstep; + vz += vzstep; + + double endz = 1.f/iz; + double endu = uz*endz; + double endv = vz*endz; + DWORD stepu = SQWORD((endu - startu) * INVSPAN); + DWORD stepv = SQWORD((endv - startv) * INVSPAN); + u = SQWORD(startu) + ds_pviewx; + v = SQWORD(startv) + ds_pviewy; + + for (i = SPANSIZE-1; i >= 0; i--) + { + fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); + x1++; + u += stepu; + v += stepv; + } + startu = endu; + startv = endv; + width -= SPANSIZE; + } + if (width > 0) + { + if (width == 1) + { + u = SQWORD(startu); + v = SQWORD(startv); + fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); + } + else + { + double left = width; + iz += ds_plane_sz[0] * left; + uz += ds_plane_su[0] * left; + vz += ds_plane_sv[0] * left; + + double endz = 1.f/iz; + double endu = uz*endz; + double endv = vz*endz; + left = 1.f/left; + DWORD stepu = SQWORD((endu - startu) * left); + DWORD stepv = SQWORD((endv - startv) * left); + u = SQWORD(startu) + ds_pviewx; + v = SQWORD(startv) + ds_pviewy; + + for (; width != 0; width--) + { + fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); + x1++; + u += stepu; + v += stepv; + } + } + } +#endif +} + +void R_DrawColoredSpan_C (int y, int x1, int x2) +{ + memset (ylookup[y] + x1 + dc_destorg, ds_color, x2 - x1 + 1); +} + +///////////////////////////////////////////////////////////////////////////// + +// Draw a column with support for non-power-of-two ranges +uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv_step, uint32_t uv_max, const BYTE *source, DWORD(*draw1column)()) +{ + int pixelsize = r_swtruecolor ? 4 : 1; + if (uv_max == 0) // power of two + { + int count = y2 - y1; + + dc_source = source; + dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_count = count; + dc_iscale = uv_step; + dc_texturefrac = uv_start; + draw1column(); + + uint64_t step64 = uv_step; + uint64_t pos64 = uv_start; + return (uint32_t)(pos64 + step64 * count); + } + else + { + uint32_t uv_pos = uv_start; + + uint32_t left = y2 - y1; + while (left > 0) + { + uint32_t available = uv_max - uv_pos; + uint32_t next_uv_wrap = available / uv_step; + if (available % uv_step != 0) + next_uv_wrap++; + uint32_t count = MIN(left, next_uv_wrap); + + dc_source = source; + dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_count = count; + dc_iscale = uv_step; + dc_texturefrac = uv_pos; + draw1column(); + + left -= count; + uv_pos += uv_step * count; + if (uv_pos >= uv_max) + uv_pos -= uv_max; + } + + return uv_pos; + } +} + +// Draw four columns with support for non-power-of-two ranges +void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_step, uint32_t uv_max, const BYTE **source, void(*draw4columns)()) +{ + int pixelsize = r_swtruecolor ? 4 : 1; + if (uv_max == 0) // power of two, no wrap handling needed + { + int count = y2 - y1; + for (int i = 0; i < 4; i++) + { + bufplce[i] = source[i]; + vplce[i] = uv_pos[i]; + vince[i] = uv_step[i]; + + uint64_t step64 = uv_step[i]; + uint64_t pos64 = uv_pos[i]; + uv_pos[i] = (uint32_t)(pos64 + step64 * count); + } + dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_count = count; + draw4columns(); + } + else + { + dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + for (int i = 0; i < 4; i++) + bufplce[i] = source[i]; + + uint32_t left = y2 - y1; + while (left > 0) + { + // Find which column wraps first + uint32_t count = left; + for (int i = 0; i < 4; i++) + { + uint32_t available = uv_max - uv_pos[i]; + uint32_t next_uv_wrap = available / uv_step[i]; + if (available % uv_step[i] != 0) + next_uv_wrap++; + count = MIN(next_uv_wrap, count); + } + + // Draw until that column wraps + for (int i = 0; i < 4; i++) + { + vplce[i] = uv_pos[i]; + vince[i] = uv_step[i]; + } + dc_count = count; + draw4columns(); + + // Wrap the uv position + for (int i = 0; i < 4; i++) + { + uv_pos[i] += uv_step[i] * count; + if (uv_pos[i] >= uv_max) + uv_pos[i] -= uv_max; + } + + left -= count; + } + } +} + +// Calculates a wrapped uv start position value for a column +void calc_uv_start_and_step(int y1, float swal, double yrepeat, uint32_t uv_height, int fracbits, uint32_t &uv_start_out, uint32_t &uv_step_out) +{ + double uv_stepd = swal * yrepeat; + + // Find start uv in [0-uv_height[ range. + // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. + double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / uv_height; + v = v - floor(v); + v *= uv_height; + v *= (1 << fracbits); + + uv_start_out = (uint32_t)v; + uv_step_out = xs_ToFixed(fracbits, uv_stepd); +} + +typedef DWORD(*Draw1ColumnFuncPtr)(); +typedef void(*Draw4ColumnsFuncPtr)(); + +void wallscan_any( + int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, + FTexture *rw_pic, fixed_t rw_offset, const BYTE *(*getcol)(FTexture *tex, int x), + void(setupwallscan(int bits, Draw1ColumnFuncPtr &draw1, Draw4ColumnsFuncPtr &draw2))) +{ + if (rw_pic->UseType == FTexture::TEX_Null) + return; + + uint32_t uv_height = rw_pic->GetHeight(); + uint32_t fracbits = 32 - rw_pic->HeightBits; + uint32_t uv_max = uv_height << fracbits; + + DWORD(*draw1column)(); + void(*draw4columns)(); + setupwallscan(fracbits, draw1column, draw4columns); + + fixed_t xoffset = rw_offset; + + bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); + if (fixed) + { + palookupoffse[0] = dc_colormap; + palookupoffse[1] = dc_colormap; + palookupoffse[2] = dc_colormap; + palookupoffse[3] = dc_colormap; + palookuplight[0] = 0; + palookuplight[1] = 0; + palookuplight[2] = 0; + palookuplight[3] = 0; + } + + if (fixedcolormap) + R_SetColorMapLight(fixedcolormap, 0, 0); + else + R_SetColorMapLight(basecolormap, 0, 0); + + float light = rw_light; + + // Calculate where 4 column alignment begins and ends: + int aligned_x1 = clamp((x1 + 3) / 4 * 4, x1, x2); + int aligned_x2 = clamp(x2 / 4 * 4, x1, x2); + + // First unaligned columns: + for (int x = x1; x < aligned_x1; x++, light += rw_lightstep) + { + int y1 = uwal[x]; + int y2 = dwal[x]; + if (y2 <= y1) + continue; + + if (!fixed) + R_SetColorMapLight(basecolormap, light, wallshade); + + const BYTE *source = getcol(rw_pic, (lwal[x] + xoffset) >> FRACBITS); + + uint32_t uv_start, uv_step; + calc_uv_start_and_step(y1, swal[x], yrepeat, uv_height, fracbits, uv_start, uv_step); + + wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, source, draw1column); + } + + // The aligned columns + for (int x = aligned_x1; x < aligned_x2; x += 4) + { + // Find y1, y2, light and uv values for four columns: + int y1[4] = { uwal[x], uwal[x + 1], uwal[x + 2], uwal[x + 3] }; + int y2[4] = { dwal[x], dwal[x + 1], dwal[x + 2], dwal[x + 3] }; + + const BYTE *source[4]; + for (int i = 0; i < 4; i++) + source[i] = getcol(rw_pic, (lwal[x + i] + xoffset) >> FRACBITS); + + float lights[4]; + for (int i = 0; i < 4; i++) + { + lights[i] = light; + light += rw_lightstep; + } + + uint32_t uv_pos[4], uv_step[4]; + for (int i = 0; i < 4; i++) + calc_uv_start_and_step(y1[i], swal[x + i], yrepeat, uv_height, fracbits, uv_pos[i], uv_step[i]); + + // Figure out where we vertically can start and stop drawing 4 columns in one go + int middle_y1 = y1[0]; + int middle_y2 = y2[0]; + for (int i = 1; i < 4; i++) + { + middle_y1 = MAX(y1[i], middle_y1); + middle_y2 = MIN(y2[i], middle_y2); + } + + // If we got an empty column in our set we cannot draw 4 columns in one go: + bool empty_column_in_set = false; + for (int i = 0; i < 4; i++) + { + if (y2[i] <= y1[i]) + empty_column_in_set = true; + } + + if (empty_column_in_set || middle_y2 <= middle_y1) + { + for (int i = 0; i < 4; i++) + { + if (y2[i] <= y1[i]) + continue; + + if (!fixed) + R_SetColorMapLight(basecolormap, lights[i], wallshade); + wallscan_drawcol1(x + i, y1[i], y2[i], uv_pos[i], uv_step[i], uv_max, source[i], draw1column); + } + continue; + } + + // Draw the first rows where not all 4 columns are active + for (int i = 0; i < 4; i++) + { + if (!fixed) + R_SetColorMapLight(basecolormap, lights[i], wallshade); + + if (y1[i] < middle_y1) + uv_pos[i] = wallscan_drawcol1(x + i, y1[i], middle_y1, uv_pos[i], uv_step[i], uv_max, source[i], draw1column); + } + + // Draw the area where all 4 columns are active + if (!fixed) + { + for (int i = 0; i < 4; i++) + { + if (r_swtruecolor) + { + palookupoffse[i] = basecolormap->Maps; + palookuplight[i] = LIGHTSCALE(lights[i], wallshade); + } + else + { + palookupoffse[i] = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); + palookuplight[i] = 0; + } + } + } + wallscan_drawcol4(x, middle_y1, middle_y2, uv_pos, uv_step, uv_max, source, draw4columns); + + // Draw the last rows where not all 4 columns are active + for (int i = 0; i < 4; i++) + { + if (!fixed) + R_SetColorMapLight(basecolormap, lights[i], wallshade); + + if (middle_y2 < y2[i]) + uv_pos[i] = wallscan_drawcol1(x + i, middle_y2, y2[i], uv_pos[i], uv_step[i], uv_max, source[i], draw1column); + } + } + + // The last unaligned columns: + for (int x = aligned_x2; x < x2; x++, light += rw_lightstep) + { + int y1 = uwal[x]; + int y2 = dwal[x]; + if (y2 <= y1) + continue; + + if (!fixed) + R_SetColorMapLight(basecolormap, light, wallshade); + + const BYTE *source = getcol(rw_pic, (lwal[x] + xoffset) >> FRACBITS); + + uint32_t uv_start, uv_step; + calc_uv_start_and_step(y1, swal[x], yrepeat, uv_height, fracbits, uv_start, uv_step); + + wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, source, draw1column); + } + + NetUpdate(); +} + +void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *rw_pic, fixed_t rw_offset, const BYTE *(*getcol)(FTexture *tex, int x)) +{ + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, rw_pic, rw_offset, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + { + setupvline(bits); + line1 = dovline1; + line4 = dovline4; + }); +} + +void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *rw_pic, fixed_t rw_offset, const BYTE *(*getcol)(FTexture *tex, int x)) +{ + if (!rw_pic->bMasked) // Textures that aren't masked can use the faster wallscan. + { + wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, rw_pic, rw_offset, getcol); + } + else + { + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, rw_pic, rw_offset, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + { + setupmvline(bits); + line1 = domvline1; + line4 = domvline4; + }); + } +} + +void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *rw_pic, fixed_t rw_offset, const BYTE *(*getcol)(FTexture *tex, int x)) +{ + static fixed_t(*tmvline1)(); + static void(*tmvline4)(); + if (!R_GetTransMaskDrawers(&tmvline1, &tmvline4)) + { + // The current translucency is unsupported, so draw with regular maskwallscan instead. + maskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, rw_pic, rw_offset, getcol); + } + else + { + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, rw_pic, rw_offset, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + { + setuptmvline(bits); + line1 = reinterpret_cast(tmvline1); + line4 = tmvline4; + }); + } +} + +///////////////////////////////////////////////////////////////////////////// + +void R_FillTransColumn_C(int x, int y1, int y2, int color, int alpha) +{ + fixed_t fglevel, bglevel; + DWORD *fg2rgb; + DWORD *bg2rgb; + int spacing; + BYTE *dest; + DWORD fg; + + fglevel = ((alpha + 1) << 8) & ~0x3ff; + bglevel = FRACUNIT - fglevel; + fg2rgb = Col2RGB8[fglevel >> 10]; + bg2rgb = Col2RGB8[bglevel >> 10]; + fg = fg2rgb[color]; + + spacing = dc_pitch; + + int ycount = y2 - y1 + 1; + dest = ylookup[y1] + x + dc_destorg; + for (int y = 0; y < ycount; y++) + { + DWORD bg = bg2rgb[*dest]; + bg = (fg + bg) | 0x1f07c1f; + *dest = RGB32k.All[bg & (bg >> 15)]; + dest += spacing; + } +} + +///////////////////////////////////////////////////////////////////////////// + +// +// R_DrawMaskedColumn +// Used for sprites and masked mid textures. +// Masked means: partly transparent, i.e. stored +// in posts/runs of opaque pixels. +// +short* dc_mfloorclip; +short* dc_mceilingclip; + +double dc_spryscale; +double dc_sprtopscreen; + +bool dc_sprflipvert; + +void R_DrawMaskedColumn(int x, const BYTE *column, const FTexture::Span *span) +{ + int pixelsize = r_swtruecolor ? 4 : 1; + const fixed_t centeryfrac = FLOAT2FIXED(CenterY); + const fixed_t texturemid = FLOAT2FIXED(dc_texturemid); + while (span->Length != 0) + { + const int length = span->Length; + const int top = span->TopOffset; + + fixed_t texturefrac = dc_texturefrac; + fixed_t iscale = dc_iscale; + + // calculate unclipped screen coordinates for post + int yl = xs_RoundToInt(dc_sprtopscreen + dc_spryscale * top); + int yh = xs_RoundToInt(dc_sprtopscreen + dc_spryscale * (top + length)) - 1; + + if (dc_sprflipvert) + { + swapvalues(yl, yh); + } + + if (yh >= dc_mfloorclip[x]) + { + yh = dc_mfloorclip[x] - 1; + } + if (yl < dc_mceilingclip[x]) + { + yl = dc_mceilingclip[x]; + } + + if (yl <= yh) + { + if (dc_sprflipvert) + { + texturefrac = (yl*iscale) - (top << FRACBITS) + - FixedMul(centeryfrac, iscale) - texturemid; + const fixed_t maxfrac = length << FRACBITS; + while (texturefrac >= maxfrac) + { + if (++yl > yh) + goto nextpost; + texturefrac += iscale; + } + fixed_t endfrac = texturefrac + (yh - yl)*iscale; + while (endfrac < 0) + { + if (--yh < yl) + goto nextpost; + endfrac -= iscale; + } + } + else + { + texturefrac = texturemid - (top << FRACBITS) + + (yl*iscale) - FixedMul(centeryfrac - FRACUNIT, iscale); + while (texturefrac < 0) + { + if (++yl > yh) + goto nextpost; + texturefrac += iscale; + } + fixed_t endfrac = texturefrac + (yh - yl)*iscale; + const fixed_t maxfrac = length << FRACBITS; + if (yh < dc_mfloorclip[x] - 1 && endfrac < maxfrac - iscale) + { + yh++; + } + else while (endfrac >= maxfrac) + { + if (--yh < yl) + goto nextpost; + endfrac -= iscale; + } + } + + dc_yl = yl; + dc_yh = yh; + dc_x = x; + dc_texturefrac = texturefrac; + dc_iscale = iscale; + dc_source = column + top; + dc_count = yh - yl + 1; + dc_dest = (ylookup[yl] + x) * pixelsize + dc_destorg; + colfunc(); + } + nextpost: + span++; + } +} diff --git a/src/r_draw.h b/src/r_draw.h index a31183405..c22c958d0 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -25,6 +25,11 @@ #include "r_defs.h" +// Prevents files outside the DrawerContext class getting good ideas about +// accessing the private globals. Any drawer actions should be facilitated +// via the DrawerContext class! +#ifdef DRAWER_INTERNALS + // Spectre/Invisibility. #define FUZZTABLE 50 extern "C" int fuzzoffset[FUZZTABLE + 1]; // [RH] +1 for the assembly routine @@ -71,6 +76,7 @@ extern "C" fixed_t dc_destalpha; // first pixel in a column extern "C" const BYTE* dc_source; +extern DCanvas *dc_canvas; extern "C" BYTE *dc_dest, *dc_destorg; extern "C" int dc_count; @@ -86,6 +92,23 @@ extern "C" unsigned int dc_tspans[4][MAXHEIGHT]; extern "C" unsigned int *dc_ctspan[4]; extern "C" unsigned int horizspans[4]; +// +// Function pointers to switch refresh/drawing functions. +// Used to select shadow mode etc. +// +extern void (*colfunc) (void); +extern void (*basecolfunc) (void); +extern void (*fuzzcolfunc) (void); +extern void (*transcolfunc) (void); +// No shadow effects on floors. +extern void (*spanfunc) (void); + +// [RH] Function pointers for the horizontal column drawers. +extern void (*hcolfunc_pre) (void); +extern void (*hcolfunc_post1) (int hx, int sx, int yl, int yh); +extern void (*hcolfunc_post2) (int hx, int sx, int yl, int yh); +extern void (*hcolfunc_post4) (int sx, int yl, int yh); + // [RH] Pointers to the different column and span drawers... // The span blitting interface. @@ -116,7 +139,7 @@ extern void (*R_DrawTranslatedColumn)(void); // Span drawing for rows, floor/ceiling. No Spectre effect needed. extern void (*R_DrawSpan)(void); void R_SetupSpanBits(FTexture *tex); -void R_SetSpanColormap(FDynamicColormap *colormap, int shade); +void R_SetSpanColormap(); void R_SetSpanSource(const BYTE *pixels); // Span drawing for masked textures. @@ -281,6 +304,15 @@ void R_FillColumnP_C (void); void R_FillColumnHorizP_C (void); void R_FillSpan_C (void); +// vars for R_DrawMaskedColumn +extern short* dc_mfloorclip; +extern short* dc_mceilingclip; +extern double dc_spryscale; +extern double dc_sprtopscreen; +extern bool dc_sprflipvert; + +void R_DrawMaskedColumn(int x, const BYTE *column, const FTexture::Span *spans); + #ifdef X86_ASM #define R_SetupDrawSlab R_SetupDrawSlabA #define R_DrawSlab R_DrawSlabA @@ -325,12 +357,7 @@ void R_InitShadeMaps(); void R_InitFuzzTable (int fuzzoff); // [RH] Consolidate column drawer selection -enum ESPSResult -{ - DontDraw, // not useful to draw this - DoDraw0, // draw this as if r_columnmethod is 0 - DoDraw1, // draw this as if r_columnmethod is 1 -}; +enum ESPSResult; ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, DWORD color); inline ESPSResult R_SetPatchStyle(FRenderStyle style, float alpha, int translation, DWORD color) { @@ -353,18 +380,6 @@ extern void(*tmvline4_revsubclamp)(); // transmaskwallscan calls this to find out what column drawers to use bool R_GetTransMaskDrawers (fixed_t (**tmvline1)(), void (**tmvline4)()); -// Retrieve column data for wallscan. Should probably be removed -// to just use the texture's GetColumn() method. It just exists -// for double-layer skies. -const BYTE *R_GetColumn (FTexture *tex, int col); -void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); - -// maskwallscan is exactly like wallscan but does not draw anything where the texture is color 0. -void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); - -// transmaskwallscan is like maskwallscan, but it can also blend to the background -void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); - // Sets dc_colormap and dc_light to their appropriate values depending on the output format (pal vs true color) void R_SetColorMapLight(FColormap *base_colormap, float light, int shade); @@ -373,4 +388,40 @@ void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade); void R_SetTranslationMap(lighttable_t *translation); +// Retrieve column data for wallscan. Should probably be removed +// to just use the texture's GetColumn() method. It just exists +// for double-layer skies. +const BYTE *R_GetColumn (FTexture *tex, int col); +void wallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *rw_pic, fixed_t rw_offset, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); + +// maskwallscan is exactly like wallscan but does not draw anything where the texture is color 0. +void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *rw_pic, fixed_t rw_offset, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); + +// transmaskwallscan is like maskwallscan, but it can also blend to the background +void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *rw_pic, fixed_t rw_offset, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); + +extern void(*R_DrawColoredSpan)(int y, int x1, int x2); +extern void(*R_DrawTiltedSpan)(int y, int x1, int x2); + +void R_DrawTiltedSpan_C(int y, int x1, int x2); +void R_DrawTiltedSpan_rgba(int y, int x1, int x2); +void R_DrawColoredSpan_C(int y, int x1, int x2); +void R_DrawColoredSpan_rgba(int y, int x1, int x2); + +extern FVector3 ds_plane_sz, ds_plane_su, ds_plane_sv; +extern bool ds_plane_shade; +extern float ds_planelightfloat; +extern fixed_t ds_pviewx, ds_pviewy; +extern int ds_planeshade; + +extern "C" BYTE *tiltlighting[MAXWIDTH]; +extern "C" { void R_CalcTiltedLighting(double lval, double lend, int width); } + +struct vissprite_t; +extern void(*R_FillTransColumn)(int x, int y1, int y2, int color, int alpha); +void R_FillTransColumn_C(int x, int y1, int y2, int color, int alpha); +void R_FillTransColumn_rgba(int x, int y1, int y2, int color, int alpha); + +#endif + #endif diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 2062609b4..420b63dff 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -21,6 +21,8 @@ // //----------------------------------------------------------------------------- +#define DRAWER_INTERNALS + #include #include "templates.h" @@ -3492,6 +3494,158 @@ public: } }; +class DrawTiltedSpanRGBACommand : public DrawerCommand +{ + int _y; + int _x1; + int _x2; + BYTE *dc_destorg; + fixed_t dc_light; + ShadeConstants dc_shade_constants; + const BYTE *ds_source; + +public: + DrawTiltedSpanRGBACommand(int y, int x1, int x2) + { + _y = y; + _x1 = x1; + _x2 = x2; + + dc_destorg = ::dc_destorg; + ds_source = ::ds_source; + } + + void Execute(DrawerThread *thread) override + { + if (thread->line_skipped_by_thread(_y)) + return; + + int y = _y; + int x1 = _x1; + int x2 = _x2; + + // Slopes are broken currently in master. + // Until R_DrawTiltedPlane is fixed we are just going to fill with a solid color. + + uint32_t *source = (uint32_t*)ds_source; + uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; + + int count = x2 - x1 + 1; + while (count > 0) + { + *(dest++) = source[0]; + count--; + } + } +}; + +class DrawColoredSpanRGBACommand : public DrawerCommand +{ + int _y; + int _x1; + int _x2; + BYTE *dc_destorg; + fixed_t ds_light; + int ds_color; + +public: + DrawColoredSpanRGBACommand(int y, int x1, int x2) + { + _y = y; + _x1 = x1; + _x2 = x2; + + dc_destorg = ::dc_destorg; + ds_light = ::ds_light; + ds_color = ::ds_color; + } + + void Execute(DrawerThread *thread) override + { + if (thread->line_skipped_by_thread(_y)) + return; + + int y = _y; + int x1 = _x1; + int x2 = _x2; + + uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; + int count = (x2 - x1 + 1); + uint32_t light = calc_light_multiplier(ds_light); + uint32_t color = shade_pal_index_simple(ds_color, light); + for (int i = 0; i < count; i++) + dest[i] = color; + } +}; + +class FillTransColumnRGBACommand : public DrawerCommand +{ + int _x; + int _y1; + int _y2; + int _color; + int _a; + BYTE *dc_destorg; + int dc_pitch; + fixed_t ds_light; + int ds_color; + +public: + FillTransColumnRGBACommand(int x, int y1, int y2, int color, int a) + { + _x = x; + _y1 = y1; + _y2 = y2; + _color = color; + _a = a; + + dc_destorg = ::dc_destorg; + dc_pitch = ::dc_pitch; + } + + void Execute(DrawerThread *thread) override + { + int x = _x; + int y1 = _y1; + int y2 = _y2; + int color = _color; + int a = _a; + + int ycount = thread->count_for_thread(y1, y2 - y1 + 1); + if (ycount <= 0) + return; + + uint32_t fg = GPalette.BaseColors[color].d; + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t alpha = a + 1; + uint32_t inv_alpha = 256 - alpha; + + fg_red *= alpha; + fg_green *= alpha; + fg_blue *= alpha; + + int spacing = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(y1, dc_pitch, ylookup[y1] + x + (uint32_t*)dc_destorg); + + for (int y = 0; y < ycount; y++) + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red * inv_alpha) / 256; + uint32_t green = (fg_green + bg_green * inv_alpha) / 256; + uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += spacing; + } + } +}; + ApplySpecialColormapRGBACommand::ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen) { buffer = screen->GetBuffer(); @@ -3968,3 +4122,18 @@ void R_DrawFogBoundary_rgba(int x1, int x2, short *uclip, short *dclip) R_DrawFogBoundarySection_rgba(t2, b2, x1); } } + +void R_DrawTiltedSpan_rgba(int y, int x1, int x2) +{ + DrawerCommandQueue::QueueCommand(y, x1, x2); +} + +void R_DrawColoredSpan_rgba(int y, int x1, int x2) +{ + DrawerCommandQueue::QueueCommand(y, x1, x2); +} + +void R_FillTransColumn_rgba(int x, int y1, int y2, int color, int a) +{ + DrawerCommandQueue::QueueCommand(x, y1, y2, color, a); +} diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 47ea75260..a91b54d74 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -23,6 +23,11 @@ #ifndef __R_DRAW_RGBA__ #define __R_DRAW_RGBA__ +// Prevents files outside the DrawerContext class getting good ideas about +// accessing the private globals. Any drawer actions should be facilitated +// via the DrawerContext class! +#ifdef DRAWER_INTERNALS + #include "r_draw.h" #include "v_palette.h" #include @@ -487,3 +492,5 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) } #endif + +#endif diff --git a/src/r_drawer_context.cpp b/src/r_drawer_context.cpp new file mode 100644 index 000000000..3533a3e4f --- /dev/null +++ b/src/r_drawer_context.cpp @@ -0,0 +1,464 @@ +// Emacs style mode select -*- C++ -*- +//----------------------------------------------------------------------------- +// +// $Id:$ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// +// $Log:$ +// +// DESCRIPTION: +// The actual span/column drawing functions. +// Here find the main potential for optimization, +// e.g. inline assembly, different algorithms. +// +//----------------------------------------------------------------------------- + +#define DRAWER_INTERNALS + +#include + +#include "templates.h" +#include "doomdef.h" +#include "i_system.h" +#include "w_wad.h" +#include "r_local.h" +#include "v_video.h" +#include "doomstat.h" +#include "st_stuff.h" +#include "g_game.h" +#include "g_level.h" +#include "r_data/r_translate.h" +#include "v_palette.h" +#include "r_data/colormaps.h" +#include "r_plane.h" +#include "r_draw_rgba.h" +#include "d_net.h" +#include "r_drawer_context.h" + +#include "gi.h" +#include "stats.h" +#include "x86.h" + +#ifdef X86_ASM +extern "C" void R_SetSpanSource_ASM (const BYTE *flat); +extern "C" void R_SetSpanSize_ASM (int xbits, int ybits); +extern "C" void R_SetSpanColormap_ASM (BYTE *colormap); +extern "C" void R_SetTiltedSpanSource_ASM(const BYTE *flat); +extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource; +#endif + +DCanvas *DrawerContext::Canvas() +{ + return dc_canvas; +} + +uint8_t DrawerContext::FlatColor() +{ + return dc_color; +} + +FColormap *DrawerContext::LightColormap() +{ + return dc_fcolormap; +} + +fixed_t DrawerContext::TextureFrac() +{ + return dc_texturefrac; +} + +fixed_t DrawerContext::TextureStep() +{ + return dc_iscale; +} + +double DrawerContext::TextureMid() +{ + return dc_texturemid; +} + +int DrawerContext::SpanXBits() +{ + return ds_xbits; +} + +int DrawerContext::SpanYBits() +{ + return ds_ybits; +} + +lighttable_t *DrawerContext::SpanLitColormap() +{ + return ds_colormap; +} + +bool DrawerContext::IsFuzzColumn() +{ + return colfunc == fuzzcolfunc; +} + +bool DrawerContext::IsFillColumn() +{ + return colfunc == R_FillColumn; +} + +bool DrawerContext::IsBaseColumn() +{ + return colfunc == basecolfunc; +} + +void DrawerContext::SetDest(int x, int y) +{ + int pixelsize = r_swtruecolor ? 4 : 1; + dc_dest = dc_destorg + (ylookup[y] + x) * pixelsize; +} + +void DrawerContext::SetFlatColor(uint8_t color_index) +{ + dc_color = color_index; +} + +void DrawerContext::SetLight(FColormap *base_colormap, float light, int shade) +{ + R_SetColorMapLight(base_colormap, light, shade); +} + +void DrawerContext::SetX(int x) +{ + dc_x = x; +} + +void DrawerContext::SetY1(int y) +{ + dc_yl = y; +} + +void DrawerContext::SetY2(int y) +{ + dc_yh = y; +} + +void DrawerContext::SetSource(const BYTE *source) +{ + dc_source = source; +} + +void DrawerContext::SetTextureFrac(fixed_t pos) +{ + dc_texturefrac = pos; +} + +void DrawerContext::SetTextureStep(fixed_t step) +{ + dc_iscale = step; +} + +void DrawerContext::SetTextureMid(double value) +{ + dc_texturemid = value; +} + +void DrawerContext::SetDrawCount(int count) +{ + dc_count = count; +} + +void DrawerContext::SetSpanY(int y) +{ + ds_y = y; +} + +void DrawerContext::SetSpanX1(int x) +{ + ds_x1 = x; +} + +void DrawerContext::SetSpanX2(int x) +{ + ds_x2 = x; +} + +void DrawerContext::SetSpanXStep(dsfixed_t step) +{ + ds_xstep = step; +} + +void DrawerContext::SetSpanYStep(dsfixed_t step) +{ + ds_ystep = step; +} + +void DrawerContext::SetSpanXBits(int bits) +{ + ds_xbits = bits; +} + +void DrawerContext::SetSpanYBits(int bits) +{ + ds_ybits = bits; +} + +void DrawerContext::SetSpanXFrac(dsfixed_t frac) +{ + ds_xfrac = frac; +} + +void DrawerContext::SetSpanYFrac(dsfixed_t frac) +{ + ds_yfrac = frac; +} + +void DrawerContext::SetSpanLight(FColormap *base_colormap, float light, int shade) +{ + R_SetDSColorMapLight(base_colormap ? base_colormap : &identitycolormap, light, shade); + R_SetSpanColormap(); +} + +ESPSResult DrawerContext::SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, DWORD color) +{ + return R_SetPatchStyle(style, alpha, translation, color); +} + +ESPSResult DrawerContext::SetPatchStyle(FRenderStyle style, float alpha, int translation, DWORD color) +{ + return R_SetPatchStyle(style, FLOAT2FIXED(alpha), translation, color); +} + +void DrawerContext::FinishSetPatchStyle() +{ + R_FinishSetPatchStyle(); +} + +void DrawerContext::SetCanvas(DCanvas *canvas) +{ + dc_canvas = canvas; + dc_destorg = canvas->GetBuffer(); + + if (r_swtruecolor != canvas->IsBgra()) + { + r_swtruecolor = canvas->IsBgra(); + R_InitColumnDrawers(); + } +} + +void DrawerContext::SetTranslationMap(lighttable_t *translation) +{ + R_SetTranslationMap(translation ? translation : identitymap); +} + +void DrawerContext::SetSpanSource(FTexture *tex) +{ + R_SetupSpanBits(tex); + if (r_swtruecolor) + ds_source = (const BYTE*)tex->GetPixelsBgra(); + else + ds_source = tex->GetPixels(); + +#ifdef X86_ASM + if (!r_swtruecolor && ds_source != ds_cursource) + { + R_SetSpanSource_ASM (ds_source); + } + if (!r_swtruecolor) + { + if (ds_source != ds_curtiltedsource) + R_SetTiltedSpanSource_ASM(ds_source); + } +#endif +} + +void DrawerContext::SetTiltedSpanState(FVector3 plane_sz, FVector3 plane_su, FVector3 plane_sv, bool plane_shade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) +{ + ds_plane_sz = plane_sz; + ds_plane_su = plane_su; + ds_plane_sv = plane_sv; + ds_plane_shade = plane_shade; + ds_planelightfloat = planelightfloat; + ds_pviewx = pviewx; + ds_pviewy = pviewy; + + if (!plane_shade) + { + for (int i = 0; i < viewwidth; ++i) + { + tiltlighting[i] = DrawerContext::SpanLitColormap(); + } + } +} + +void DrawerContext::SetSlabLight(const BYTE *colormap) +{ + R_SetupDrawSlab(colormap); +} + +void DrawerContext::DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *source, int dest_x, int dest_y) +{ + int pixelsize = r_swtruecolor ? 4 : 1; + R_DrawSlab(dx, v, dy, vi, source, (ylookup[dest_y] + dest_x) * pixelsize + dc_destorg); +} + +void DrawerContext::SetSpanStyle(fixed_t alpha, bool additive, bool masked) +{ + if (spanfunc != R_FillSpan) + { + if (masked) + { + if (alpha < OPAQUE || additive) + { + if (!additive) + { + spanfunc = R_DrawSpanMaskedTranslucent; + dc_srcblend = Col2RGB8[alpha >> 10]; + dc_destblend = Col2RGB8[(OPAQUE - alpha) >> 10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE - alpha; + } + else + { + spanfunc = R_DrawSpanMaskedAddClamp; + dc_srcblend = Col2RGB8_LessPrecision[alpha >> 10]; + dc_destblend = Col2RGB8_LessPrecision[FRACUNIT >> 10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE - alpha; + } + } + else + { + spanfunc = R_DrawSpanMasked; + } + } + else + { + if (alpha < OPAQUE || additive) + { + if (!additive) + { + spanfunc = R_DrawSpanTranslucent; + dc_srcblend = Col2RGB8[alpha >> 10]; + dc_destblend = Col2RGB8[(OPAQUE - alpha) >> 10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE - alpha; + } + else + { + spanfunc = R_DrawSpanAddClamp; + dc_srcblend = Col2RGB8_LessPrecision[alpha >> 10]; + dc_destblend = Col2RGB8_LessPrecision[FRACUNIT >> 10]; + dc_srcalpha = alpha; + dc_destalpha = OPAQUE - alpha; + } + } + else + { + spanfunc = R_DrawSpan; + } + } + } +} + +void DrawerContext::RtInitCols(BYTE *buffer) +{ + rt_initcols(buffer); +} + +void DrawerContext::RtSpanCoverage(int x, int start, int stop) +{ + rt_span_coverage(x, start, stop); +} + +void DrawerContext::SetMaskedColumnState(short *mfloorclip, short *mceilingclip, double spryscale, double sprtopscreen, bool sprflipvert) +{ + dc_mfloorclip = mfloorclip; + dc_mceilingclip = mceilingclip; + dc_spryscale = spryscale; + dc_sprtopscreen = sprtopscreen; + dc_sprflipvert = sprflipvert; +} + +void DrawerContext::DrawMaskedColumn(int x, const BYTE *column, const FTexture::Span *spans) +{ + R_DrawMaskedColumn(x, column, spans); +} + +void DrawerContext::DrawMaskedColumnHoriz(int x, const BYTE *column, const FTexture::Span *spans) +{ + dc_x = x; + R_DrawMaskedColumnHoriz(column, spans); +} + +void DrawerContext::DrawFogBoundary(int x1, int x2, short *uclip, short *dclip) +{ + R_DrawFogBoundary(x1, x2, uclip, dclip); +} + +void DrawerContext::DrawRt4cols(int sx) +{ + rt_draw4cols(sx); +} + +void DrawerContext::DrawColumn() +{ + colfunc(); +} + +void DrawerContext::DrawSpan() +{ + spanfunc(); +} + +void DrawerContext::DrawHColumnPre() +{ + hcolfunc_pre(); +} + +void DrawerContext::DrawSimplePolySpan() +{ + R_DrawSpan(); +} + +void DrawerContext::SetBaseStyle() +{ + colfunc = basecolfunc; + hcolfunc_post1 = rt_map1col; + hcolfunc_post4 = rt_map4cols; +} + +void DrawerContext::DrawWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *rw_pic, fixed_t rw_offset, const BYTE *(*getcol)(FTexture *tex, int col)) +{ + wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, rw_pic, rw_offset, getcol ? getcol : R_GetColumn); +} + +void DrawerContext::DrawMaskedWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *rw_pic, fixed_t rw_offset, const BYTE *(*getcol)(FTexture *tex, int col)) +{ + maskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, rw_pic, rw_offset, getcol ? getcol : R_GetColumn); +} + +void DrawerContext::DrawTransMaskedWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *rw_pic, fixed_t rw_offset, const BYTE *(*getcol)(FTexture *tex, int col)) +{ + transmaskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, rw_pic, rw_offset, getcol ? getcol : R_GetColumn); +} + +void DrawerContext::DrawColoredSpan(int y, int x1, int x2) +{ + R_DrawColoredSpan(y, x1, x2); +} + +void DrawerContext::DrawTiltedSpan(int y, int x1, int x2) +{ + R_DrawTiltedSpan(y, x1, x2); +} + +void DrawerContext::FillTransColumn(int x, int y1, int y2, int color, int alpha) +{ + R_FillTransColumn(x, y1, y2, color, alpha); +} diff --git a/src/r_drawer_context.h b/src/r_drawer_context.h new file mode 100644 index 000000000..64e0bf6a0 --- /dev/null +++ b/src/r_drawer_context.h @@ -0,0 +1,123 @@ +// Emacs style mode select -*- C++ -*- +//----------------------------------------------------------------------------- +// +// $Id:$ +// +// Copyright (C) 1993-1996 by id Software, Inc. +// +// This source is available for distribution and/or modification +// only under the terms of the DOOM Source Code License as +// published by id Software. All rights reserved. +// +// The source is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License +// for more details. +// +// DESCRIPTION: +// System specific interface stuff. +// +//----------------------------------------------------------------------------- + + +#ifndef __R_DRAWER_CONTEXT__ +#define __R_DRAWER_CONTEXT__ + +#include "r_defs.h" + +// [RH] Consolidate column drawer selection +enum ESPSResult +{ + DontDraw, // not useful to draw this + DoDraw0, // draw this as if r_columnmethod is 0 + DoDraw1, // draw this as if r_columnmethod is 1 +}; + +struct TiltedPlaneData; + +// Immediate graphics context for column/span based software rendering. +class DrawerContext +{ +public: + static DCanvas *Canvas(); // dc_destorg + + static uint8_t FlatColor(); // dc_color + static FColormap *LightColormap(); // dc_fcolormap + static fixed_t TextureFrac(); // dc_texturefrac + static fixed_t TextureStep(); // dc_iscale + static double TextureMid(); // dc_texturemid + + static int SpanXBits(); // ds_xbits + static int SpanYBits(); // ds_ybits + static lighttable_t *SpanLitColormap(); // ds_colormap + + static bool IsFuzzColumn(); // colfunc == fuzzcolfunc + static bool IsFillColumn(); // colfunc == R_FillColumn + static bool IsBaseColumn(); // colfunc == basecolfunc + + static void SetCanvas(DCanvas *canvas); // dc_destorg + + static void SetFlatColor(uint8_t color_index); // dc_color + static void SetLight(FColormap *base_colormap, float light, int shade); + static void SetTranslationMap(lighttable_t *translation); + static void SetX(int x); // dc_x + static void SetY1(int y); // dc_yl + static void SetY2(int y); // dc_yh + static void SetSource(const BYTE *source); // dc_source + static void SetTextureFrac(fixed_t pos); // dc_texturefrac + static void SetTextureStep(fixed_t step); // dc_iscale + static void SetTextureMid(double value); // dc_texturemid + static void SetDest(int x, int y); // dc_dest + static void SetDrawCount(int count); // dc_count + + static void SetSpanY(int y); // ds_y + static void SetSpanX1(int x); // ds_x1 + static void SetSpanX2(int x); // ds_x2 + static void SetSpanXStep(dsfixed_t step); // ds_xstep + static void SetSpanYStep(dsfixed_t step); // ds_ystep + static void SetSpanXBits(int bits); // ds_xbits + static void SetSpanYBits(int bits); // ds_ybits + static void SetSpanXFrac(dsfixed_t frac); // ds_xfrac + static void SetSpanYFrac(dsfixed_t frac); // ds_yfrac + static void SetSpanLight(FColormap *base_colormap, float light, int shade); + static void SetSpanSource(FTexture *texture); + static void SetSpanStyle(fixed_t alpha, bool additive, bool masked); + + static void SetSlabLight(const BYTE *colormap); + + static ESPSResult SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, DWORD color); + static ESPSResult SetPatchStyle(FRenderStyle style, float alpha, int translation, DWORD color); + // Call this after finished drawing the current thing, in case its style was STYLE_Shade + static void SetBaseStyle(); + static void FinishSetPatchStyle(); + + static void SetMaskedColumnState(short *mfloorclip, short *mceilingclip, double spryscale, double sprtopscreen, bool sprflipvert); + static void SetTiltedSpanState(FVector3 plane_sz, FVector3 plane_su, FVector3 plane_sv, bool plane_shade, float planelightfloat, fixed_t pviewx, fixed_t pviewy); + + static void RtInitCols(BYTE *buffer); + static void RtSpanCoverage(int x, int start, int stop); + + static void DrawMaskedColumn(int x, const BYTE *column, const FTexture::Span *spans); + static void DrawMaskedColumnHoriz(int x, const BYTE *column, const FTexture::Span *spans); + + static void DrawRt4cols(int sx); + static void DrawColumn(); + static void DrawHColumnPre(); + static void DrawSpan(); + static void DrawSimplePolySpan(); + + static void DrawWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *texture, fixed_t texturefrac, const BYTE *(*getcol)(FTexture *tex, int col) = nullptr); + static void DrawMaskedWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *texture, fixed_t texturefrac, const BYTE *(*getcol)(FTexture *tex, int col) = nullptr); + static void DrawTransMaskedWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, FTexture *texture, fixed_t texturefrac, const BYTE *(*getcol)(FTexture *tex, int col) = nullptr); + + static void DrawColoredSpan(int y, int x1, int x2); + static void DrawTiltedSpan(int y, int x1, int x2); + + static void DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *source, int dest_x, int dest_y); + + static void FillTransColumn(int x, int y1, int y2, int color, int alpha); + + static void DrawFogBoundary(int x1, int x2, short *uclip, short *dclip); +}; + +#endif diff --git a/src/r_drawt.cpp b/src/r_drawt.cpp index 837093044..a74dc0133 100644 --- a/src/r_drawt.cpp +++ b/src/r_drawt.cpp @@ -38,6 +38,8 @@ ** Let's hope so. :-) */ +#define DRAWER_INTERNALS + #include "templates.h" #include "doomtype.h" #include "doomdef.h" @@ -1128,26 +1130,26 @@ void R_DrawMaskedColumnHoriz (const BYTE *column, const FTexture::Span *span) const int top = span->TopOffset; // calculate unclipped screen coordinates for post - dc_yl = xs_RoundToInt(sprtopscreen + spryscale * top); - dc_yh = xs_RoundToInt(sprtopscreen + spryscale * (top + length) - 1); + dc_yl = xs_RoundToInt(dc_sprtopscreen + dc_spryscale * top); + dc_yh = xs_RoundToInt(dc_sprtopscreen + dc_spryscale * (top + length) - 1); - if (sprflipvert) + if (dc_sprflipvert) { swapvalues (dc_yl, dc_yh); } - if (dc_yh >= mfloorclip[dc_x]) + if (dc_yh >= dc_mfloorclip[dc_x]) { - dc_yh = mfloorclip[dc_x] - 1; + dc_yh = dc_mfloorclip[dc_x] - 1; } - if (dc_yl < mceilingclip[dc_x]) + if (dc_yl < dc_mceilingclip[dc_x]) { - dc_yl = mceilingclip[dc_x]; + dc_yl = dc_mceilingclip[dc_x]; } if (dc_yl <= dc_yh) { - if (sprflipvert) + if (dc_sprflipvert) { dc_texturefrac = (dc_yl*dc_iscale) - (top << FRACBITS) - fixed_t(CenterY * dc_iscale) - texturemid; @@ -1178,7 +1180,7 @@ void R_DrawMaskedColumnHoriz (const BYTE *column, const FTexture::Span *span) } fixed_t endfrac = dc_texturefrac + (dc_yh-dc_yl)*dc_iscale; const fixed_t maxfrac = length << FRACBITS; - if (dc_yh < mfloorclip[dc_x]-1 && endfrac < maxfrac - dc_iscale) + if (dc_yh < dc_mfloorclip[dc_x]-1 && endfrac < maxfrac - dc_iscale) { dc_yh++; } @@ -1198,7 +1200,7 @@ nextpost: span++; } - if (sprflipvert) + if (dc_sprflipvert) { unsigned int *front = horizspan[dc_x&3]; unsigned int *back = dc_ctspan[dc_x&3] - 2; diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index d2d715c8d..4b6605b4a 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -34,6 +34,8 @@ ** Please see r_drawt.cpp for a description of the globals used. */ +#define DRAWER_INTERNALS + #include "templates.h" #include "doomtype.h" #include "doomdef.h" diff --git a/src/r_main.cpp b/src/r_main.cpp index 247a98125..a30aa232b 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -23,6 +23,8 @@ // //----------------------------------------------------------------------------- +#define DRAWER_INTERNALS + // HEADER FILES ------------------------------------------------------------ #include @@ -150,17 +152,6 @@ angle_t xtoviewangle[MAXWIDTH+1]; bool foggy; // [RH] ignore extralight and fullbright? int r_actualextralight; -void (*colfunc) (void); -void (*basecolfunc) (void); -void (*fuzzcolfunc) (void); -void (*transcolfunc) (void); -void (*spanfunc) (void); - -void (*hcolfunc_pre) (void); -void (*hcolfunc_post1) (int hx, int sx, int yl, int yh); -void (*hcolfunc_post2) (int hx, int sx, int yl, int yh); -void (*hcolfunc_post4) (int sx, int yl, int yh); - cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles; // PRIVATE DATA DEFINITIONS ------------------------------------------------ @@ -820,6 +811,7 @@ void R_SetupBuffer () ASM_PatchPitch (); #endif } + dc_canvas = RenderTarget; dc_destorg = lineptr; for (int i = 0; i < RenderTarget->GetHeight(); i++) { diff --git a/src/r_main.h b/src/r_main.h index fa8fe0bb1..91eb5b183 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -110,22 +110,6 @@ extern FColormap* fixedcolormap; extern FSpecialColormap*realfixedcolormap; -// -// Function pointers to switch refresh/drawing functions. -// Used to select shadow mode etc. -// -extern void (*colfunc) (void); -extern void (*basecolfunc) (void); -extern void (*fuzzcolfunc) (void); -extern void (*transcolfunc) (void); -// No shadow effects on floors. -extern void (*spanfunc) (void); - -// [RH] Function pointers for the horizontal column drawers. -extern void (*hcolfunc_pre) (void); -extern void (*hcolfunc_post1) (int hx, int sx, int yl, int yh); -extern void (*hcolfunc_post2) (int hx, int sx, int yl, int yh); -extern void (*hcolfunc_post4) (int sx, int yl, int yh); void R_InitTextureMapping (); diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 807066f77..c4d7cd59c 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -58,7 +58,7 @@ #include "r_3dfloors.h" #include "v_palette.h" #include "r_data/colormaps.h" -#include "r_draw_rgba.h" +#include "r_drawer_context.h" #ifdef _MSC_VER #pragma warning(disable:4244) @@ -138,8 +138,6 @@ FVector3 plane_sz, plane_su, plane_sv; float planelightfloat; bool plane_shade; fixed_t pviewx, pviewy; - -void R_DrawTiltedPlane_ASM (int y, int x1); } float yslope[MAXHEIGHT]; @@ -147,13 +145,6 @@ static fixed_t xscale, yscale; static double xstepscale, ystepscale; static double basexfrac, baseyfrac; -#ifdef X86_ASM -extern "C" void R_SetSpanSource_ASM (const BYTE *flat); -extern "C" void R_SetSpanSize_ASM (int xbits, int ybits); -extern "C" void R_SetSpanColormap_ASM (BYTE *colormap); -extern "C" void R_SetTiltedSpanSource_ASM (const BYTE *flat); -extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource; -#endif void R_DrawSinglePlane (visplane_t *, fixed_t alpha, bool additive, bool masked); //========================================================================== @@ -220,304 +211,22 @@ void R_MapPlane (int y, int x1) distance = planeheight * yslope[y]; - ds_xstep = xs_ToFixed(32-ds_xbits, distance * xstepscale); - ds_ystep = xs_ToFixed(32-ds_ybits, distance * ystepscale); - ds_xfrac = xs_ToFixed(32-ds_xbits, distance * basexfrac) + pviewx; - ds_yfrac = xs_ToFixed(32-ds_ybits, distance * baseyfrac) + pviewy; + DrawerContext::SetSpanXStep(xs_ToFixed(32 - DrawerContext::SpanXBits(), distance * xstepscale)); + DrawerContext::SetSpanYStep(xs_ToFixed(32 - DrawerContext::SpanYBits(), distance * ystepscale)); + DrawerContext::SetSpanXFrac(xs_ToFixed(32 - DrawerContext::SpanXBits(), distance * basexfrac) + pviewx); + DrawerContext::SetSpanYFrac(xs_ToFixed(32 - DrawerContext::SpanYBits(), distance * baseyfrac) + pviewy); if (plane_shade) { // Determine lighting based on the span's distance from the viewer. - R_SetDSColorMapLight(basecolormap, GlobVis * fabs(CenterY - y), planeshade); + DrawerContext::SetSpanLight(basecolormap, GlobVis * fabs(CenterY - y), planeshade); } -#ifdef X86_ASM - if (!r_swtruecolor && ds_colormap != ds_curcolormap) - R_SetSpanColormap_ASM (ds_colormap); -#endif + DrawerContext::SetSpanY(y); + DrawerContext::SetSpanX1(x1); + DrawerContext::SetSpanX2(x2); - ds_y = y; - ds_x1 = x1; - ds_x2 = x2; - - spanfunc (); -} - -//========================================================================== -// -// R_CalcTiltedLighting -// -// Calculates the lighting for one row of a tilted plane. If the definition -// of GETPALOOKUP changes, this needs to change, too. -// -//========================================================================== - -extern "C" { -void R_CalcTiltedLighting (double lval, double lend, int width) -{ - double lstep; - BYTE *lightfiller; - BYTE *basecolormapdata = basecolormap->Maps; - int i = 0; - - if (width == 0 || lval == lend) - { // Constant lighting - lightfiller = basecolormapdata + (GETPALOOKUP(lval, planeshade) << COLORMAPSHIFT); - } - else - { - lstep = (lend - lval) / width; - if (lval >= MAXLIGHTVIS) - { // lval starts "too bright". - lightfiller = basecolormapdata + (GETPALOOKUP(lval, planeshade) << COLORMAPSHIFT); - for (; i <= width && lval >= MAXLIGHTVIS; ++i) - { - tiltlighting[i] = lightfiller; - lval += lstep; - } - } - if (lend >= MAXLIGHTVIS) - { // lend ends "too bright". - lightfiller = basecolormapdata + (GETPALOOKUP(lend, planeshade) << COLORMAPSHIFT); - for (; width > i && lend >= MAXLIGHTVIS; --width) - { - tiltlighting[width] = lightfiller; - lend -= lstep; - } - } - if (width > 0) - { - lval = FIXED2DBL(planeshade) - lval; - lend = FIXED2DBL(planeshade) - lend; - lstep = (lend - lval) / width; - if (lstep < 0) - { // Going from dark to light - if (lval < 1.) - { // All bright - lightfiller = basecolormapdata; - } - else - { - if (lval >= NUMCOLORMAPS) - { // Starts beyond the dark end - BYTE *clight = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT); - while (lval >= NUMCOLORMAPS && i <= width) - { - tiltlighting[i++] = clight; - lval += lstep; - } - if (i > width) - return; - } - while (i <= width && lval >= 0) - { - tiltlighting[i++] = basecolormapdata + (xs_ToInt(lval) << COLORMAPSHIFT); - lval += lstep; - } - lightfiller = basecolormapdata; - } - } - else - { // Going from light to dark - if (lval >= (NUMCOLORMAPS-1)) - { // All dark - lightfiller = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT); - } - else - { - while (lval < 0 && i <= width) - { - tiltlighting[i++] = basecolormapdata; - lval += lstep; - } - if (i > width) - return; - while (i <= width && lval < (NUMCOLORMAPS-1)) - { - tiltlighting[i++] = basecolormapdata + (xs_ToInt(lval) << COLORMAPSHIFT); - lval += lstep; - } - lightfiller = basecolormapdata + ((NUMCOLORMAPS-1) << COLORMAPSHIFT); - } - } - } - } - for (; i <= width; i++) - { - tiltlighting[i] = lightfiller; - } -} -} // extern "C" - -//========================================================================== -// -// R_MapTiltedPlane -// -//========================================================================== - -void R_MapTiltedPlane_C (int y, int x1) -{ - int x2 = spanend[y]; - int width = x2 - x1; - double iz, uz, vz; - BYTE *fb; - DWORD u, v; - int i; - - iz = plane_sz[2] + plane_sz[1]*(centery-y) + plane_sz[0]*(x1-centerx); - - // Lighting is simple. It's just linear interpolation from start to end - if (plane_shade) - { - uz = (iz + plane_sz[0]*width) * planelightfloat; - vz = iz * planelightfloat; - R_CalcTiltedLighting (vz, uz, width); - } - - uz = plane_su[2] + plane_su[1]*(centery-y) + plane_su[0]*(x1-centerx); - vz = plane_sv[2] + plane_sv[1]*(centery-y) + plane_sv[0]*(x1-centerx); - - fb = ylookup[y] + x1 + dc_destorg; - - BYTE vshift = 32 - ds_ybits; - BYTE ushift = vshift - ds_xbits; - int umask = ((1 << ds_xbits) - 1) << ds_ybits; - -#if 0 // The "perfect" reference version of this routine. Pretty slow. - // Use it only to see how things are supposed to look. - i = 0; - do - { - double z = 1.f/iz; - - u = SQWORD(uz*z) + pviewx; - v = SQWORD(vz*z) + pviewy; - R_SetDSColorMapLight(tiltlighting[i], 0, 0); - fb[i++] = ds_colormap[ds_source[(v >> vshift) | ((u >> ushift) & umask)]]; - iz += plane_sz[0]; - uz += plane_su[0]; - vz += plane_sv[0]; - } while (--width >= 0); -#else -//#define SPANSIZE 32 -//#define INVSPAN 0.03125f -//#define SPANSIZE 8 -//#define INVSPAN 0.125f -#define SPANSIZE 16 -#define INVSPAN 0.0625f - - double startz = 1.f/iz; - double startu = uz*startz; - double startv = vz*startz; - double izstep, uzstep, vzstep; - - izstep = plane_sz[0] * SPANSIZE; - uzstep = plane_su[0] * SPANSIZE; - vzstep = plane_sv[0] * SPANSIZE; - x1 = 0; - width++; - - while (width >= SPANSIZE) - { - iz += izstep; - uz += uzstep; - vz += vzstep; - - double endz = 1.f/iz; - double endu = uz*endz; - double endv = vz*endz; - DWORD stepu = SQWORD((endu - startu) * INVSPAN); - DWORD stepv = SQWORD((endv - startv) * INVSPAN); - u = SQWORD(startu) + pviewx; - v = SQWORD(startv) + pviewy; - - for (i = SPANSIZE-1; i >= 0; i--) - { - fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); - x1++; - u += stepu; - v += stepv; - } - startu = endu; - startv = endv; - width -= SPANSIZE; - } - if (width > 0) - { - if (width == 1) - { - u = SQWORD(startu); - v = SQWORD(startv); - fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); - } - else - { - double left = width; - iz += plane_sz[0] * left; - uz += plane_su[0] * left; - vz += plane_sv[0] * left; - - double endz = 1.f/iz; - double endu = uz*endz; - double endv = vz*endz; - left = 1.f/left; - DWORD stepu = SQWORD((endu - startu) * left); - DWORD stepv = SQWORD((endv - startv) * left); - u = SQWORD(startu) + pviewx; - v = SQWORD(startv) + pviewy; - - for (; width != 0; width--) - { - fb[x1] = *(tiltlighting[x1] + ds_source[(v >> vshift) | ((u >> ushift) & umask)]); - x1++; - u += stepu; - v += stepv; - } - } - } -#endif -} - -void R_MapTiltedPlane_rgba (int y, int x1) -{ - int x2 = spanend[y]; - - // Slopes are broken currently in master. - // Until R_DrawTiltedPlane is fixed we are just going to fill with a solid color. - - uint32_t *source = (uint32_t*)ds_source; - int source_width = 1 << ds_xbits; - int source_height = 1 << ds_ybits; - - uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; - - int count = x2 - x1 + 1; - while (count > 0) - { - *(dest++) = source[0]; - count--; - } -} - -//========================================================================== -// -// R_MapColoredPlane -// -//========================================================================== - -void R_MapColoredPlane_C (int y, int x1) -{ - memset (ylookup[y] + x1 + dc_destorg, ds_color, spanend[y] - x1 + 1); -} - -void R_MapColoredPlane_rgba(int y, int x1) -{ - uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; - int count = (spanend[y] - x1 + 1); - uint32_t light = calc_light_multiplier(ds_light); - uint32_t color = shade_pal_index_simple(ds_color, light); - for (int i = 0; i < count; i++) - dest[i] = color; + DrawerContext::DrawSpan(); } //========================================================================== @@ -1014,7 +723,7 @@ static void R_DrawSky (visplane_t *pl) rw_offset = 0; frontyScale = rw_pic->Scale.Y; - dc_texturemid = skymid * frontyScale; + DrawerContext::SetTextureMid(skymid * frontyScale); if (1 << frontskytex->HeightBits == frontskytex->GetHeight()) { // The texture tiles nicely @@ -1023,8 +732,8 @@ static void R_DrawSky (visplane_t *pl) lastskycol[x] = 0xffffffff; lastskycol_bgra[x] = 0xffffffff; } - wallscan (pl->left, pl->right, (short *)pl->top, (short *)pl->bottom, swall, lwall, - frontyScale, backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); + DrawerContext::DrawWall (pl->left, pl->right, (short *)pl->top, (short *)pl->bottom, swall, lwall, + frontyScale, rw_pic, rw_offset, backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); } else { // The texture does not tile nicely @@ -1047,7 +756,7 @@ static void R_DrawSkyStriped (visplane_t *pl) if (topfrac < 0) topfrac += frontskytex->GetHeight(); yl = 0; yh = short((frontskytex->GetHeight() - topfrac) * frontyScale); - dc_texturemid = topfrac - iscale * (1 - CenterY); + DrawerContext::SetTextureMid(topfrac - iscale * (1 - CenterY)); while (yl < viewheight) { @@ -1061,11 +770,11 @@ static void R_DrawSkyStriped (visplane_t *pl) lastskycol[x] = 0xffffffff; lastskycol_bgra[x] = 0xffffffff; } - wallscan (pl->left, pl->right, top, bot, swall, lwall, rw_pic->Scale.Y, - backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); + DrawerContext::DrawWall(pl->left, pl->right, top, bot, swall, lwall, rw_pic->Scale.Y, + rw_pic, rw_offset, backskytex == NULL ? R_GetOneSkyColumn : R_GetTwoSkyColumns); yl = yh; yh += drawheight; - dc_texturemid = iscale * (centery-yl-1); + DrawerContext::SetTextureMid(iscale * (centery-yl-1)); } } @@ -1086,7 +795,7 @@ int R_DrawPlanes () int i; int vpcount = 0; - ds_color = 3; + DrawerContext::SetFlatColor(3); for (i = 0; i < MAXVISPLANES; i++) { @@ -1111,7 +820,7 @@ void R_DrawHeightPlanes(double height) visplane_t *pl; int i; - ds_color = 3; + DrawerContext::SetFlatColor(3); DVector3 oViewPos = ViewPos; DAngle oViewAngle = ViewAngle; @@ -1151,8 +860,8 @@ void R_DrawSinglePlane (visplane_t *pl, fixed_t alpha, bool additive, bool maske if (r_drawflat) { // [RH] no texture mapping - ds_color += 4; - R_MapVisPlane (pl, R_MapColoredPlane); + DrawerContext::SetFlatColor(DrawerContext::FlatColor() + 4); + R_MapVisPlane (pl, [](int y, int x1) { DrawerContext::DrawColoredSpan(y, x1, spanend[y]); }); } else if (pl->picnum == skyflatnum) { // sky flat @@ -1175,13 +884,9 @@ void R_DrawSinglePlane (visplane_t *pl, fixed_t alpha, bool additive, bool maske { // Don't waste time on a masked texture if it isn't really masked. masked = false; } - R_SetupSpanBits(tex); double xscale = pl->xform.xScale * tex->Scale.X; double yscale = pl->xform.yScale * tex->Scale.Y; - if (r_swtruecolor) - ds_source = (const BYTE*)tex->GetPixelsBgra(); - else - ds_source = tex->GetPixels(); + DrawerContext::SetSpanSource(tex); basecolormap = pl->colormap; planeshade = LIGHT2SHADE(pl->lightlevel); @@ -1544,13 +1249,13 @@ void R_DrawSkyPlane (visplane_t *pl) bool fakefixed = false; if (fixedcolormap) { - R_SetColorMapLight(fixedcolormap, 0, 0); + DrawerContext::SetLight(fixedcolormap, 0, 0); } else { fakefixed = true; fixedcolormap = &NormalLight; - R_SetColorMapLight(fixedcolormap, 0, 0); + DrawerContext::SetLight(fixedcolormap, 0, 0); } R_DrawSky (pl); @@ -1567,13 +1272,6 @@ void R_DrawSkyPlane (visplane_t *pl) void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t alpha, bool additive, bool masked) { -#ifdef X86_ASM - if (!r_swtruecolor && ds_source != ds_cursource) - { - R_SetSpanSource_ASM (ds_source); - } -#endif - if (alpha <= 0) { return; @@ -1583,8 +1281,8 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t double xstep, ystep, leftxfrac, leftyfrac, rightxfrac, rightyfrac; double x; - xscale = xs_ToFixed(32 - ds_xbits, _xscale); - yscale = xs_ToFixed(32 - ds_ybits, _yscale); + xscale = xs_ToFixed(32 - DrawerContext::SpanXBits(), _xscale); + yscale = xs_ToFixed(32 - DrawerContext::SpanYBits(), _yscale); if (planeang != 0) { double cosine = cos(planeang), sine = sin(planeang); @@ -1631,15 +1329,14 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t planeheight = fabs(pl->height.Zat0() - ViewPos.Z); GlobVis = r_FloorVisibility / planeheight; - ds_light = 0; if (fixedlightlev >= 0) { - R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + DrawerContext::SetSpanLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); plane_shade = false; } else if (fixedcolormap) { - R_SetDSColorMapLight(fixedcolormap, 0, 0); + DrawerContext::SetSpanLight(fixedcolormap, 0, 0); plane_shade = false; } else @@ -1647,61 +1344,8 @@ void R_DrawNormalPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t plane_shade = true; } - if (spanfunc != R_FillSpan) - { - if (masked) - { - if (alpha < OPAQUE || additive) - { - if (!additive) - { - spanfunc = R_DrawSpanMaskedTranslucent; - dc_srcblend = Col2RGB8[alpha>>10]; - dc_destblend = Col2RGB8[(OPAQUE-alpha)>>10]; - dc_srcalpha = alpha; - dc_destalpha = OPAQUE - alpha; - } - else - { - spanfunc = R_DrawSpanMaskedAddClamp; - dc_srcblend = Col2RGB8_LessPrecision[alpha>>10]; - dc_destblend = Col2RGB8_LessPrecision[FRACUNIT>>10]; - dc_srcalpha = alpha; - dc_destalpha = OPAQUE - alpha; - } - } - else - { - spanfunc = R_DrawSpanMasked; - } - } - else - { - if (alpha < OPAQUE || additive) - { - if (!additive) - { - spanfunc = R_DrawSpanTranslucent; - dc_srcblend = Col2RGB8[alpha>>10]; - dc_destblend = Col2RGB8[(OPAQUE-alpha)>>10]; - dc_srcalpha = alpha; - dc_destalpha = OPAQUE - alpha; - } - else - { - spanfunc = R_DrawSpanAddClamp; - dc_srcblend = Col2RGB8_LessPrecision[alpha>>10]; - dc_destblend = Col2RGB8_LessPrecision[FRACUNIT>>10]; - dc_srcalpha = alpha; - dc_destalpha = OPAQUE - alpha; - } - } - else - { - spanfunc = R_DrawSpan; - } - } - } + DrawerContext::SetSpanStyle(alpha, additive, masked); + R_MapVisPlane (pl, R_MapPlane); } @@ -1733,14 +1377,14 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t return; } - lxscale = _xscale * ifloatpow2[ds_xbits]; - lyscale = _yscale * ifloatpow2[ds_ybits]; + lxscale = _xscale * ifloatpow2[DrawerContext::SpanXBits()]; + lyscale = _yscale * ifloatpow2[DrawerContext::SpanYBits()]; xscale = 64.f / lxscale; yscale = 64.f / lyscale; zeroheight = pl->height.ZatPoint(ViewPos); - pviewx = xs_ToFixed(32 - ds_xbits, pl->xform.xOffs * pl->xform.xScale); - pviewy = xs_ToFixed(32 - ds_ybits, pl->xform.yOffs * pl->xform.yScale); + pviewx = xs_ToFixed(32 - DrawerContext::SpanXBits(), pl->xform.xOffs * pl->xform.xScale); + pviewy = xs_ToFixed(32 - DrawerContext::SpanYBits(), pl->xform.yOffs * pl->xform.yScale); planeang = (pl->xform.Angle + pl->xform.baseAngle).Radians(); // p is the texture origin in view space @@ -1810,42 +1454,22 @@ void R_DrawTiltedPlane (visplane_t *pl, double _xscale, double _yscale, fixed_t if (fixedlightlev >= 0) { - R_SetDSColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + DrawerContext::SetSpanLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); plane_shade = false; } else if (fixedcolormap) { - R_SetDSColorMapLight(fixedcolormap, 0, 0); + DrawerContext::SetSpanLight(fixedcolormap, 0, 0); plane_shade = false; } else { - R_SetDSColorMapLight(basecolormap, 0, 0); + DrawerContext::SetSpanLight(basecolormap, 0, 0); plane_shade = true; } - if (!plane_shade) - { - for (int i = 0; i < viewwidth; ++i) - { - tiltlighting[i] = ds_colormap; - } - } - -#if defined(X86_ASM) - if (!r_swtruecolor) - { - if (ds_source != ds_curtiltedsource) - R_SetTiltedSpanSource_ASM(ds_source); - R_MapVisPlane(pl, R_DrawTiltedPlane_ASM); - } - else - { - R_MapVisPlane(pl, R_MapTiltedPlane); - } -#else - R_MapVisPlane (pl, R_MapTiltedPlane); -#endif + DrawerContext::SetTiltedSpanState(plane_sz, plane_su, plane_sv, plane_shade, planelightfloat, pviewx, pviewy); + R_MapVisPlane (pl, [](int y, int x1) { DrawerContext::DrawTiltedSpan(y, x1, spanend[y]); }); } //========================================================================== diff --git a/src/r_plane.h b/src/r_plane.h index b199d3477..d4db3dc09 100644 --- a/src/r_plane.h +++ b/src/r_plane.h @@ -93,14 +93,6 @@ void R_DrawNormalPlane (visplane_t *pl, double xscale, double yscale, fixed_t al void R_DrawTiltedPlane (visplane_t *pl, double xscale, double yscale, fixed_t alpha, bool additive, bool masked); void R_MapVisPlane (visplane_t *pl, void (*mapfunc)(int y, int x1)); -extern void(*R_MapColoredPlane)(int y, int x1); -extern void(*R_MapTiltedPlane)(int y, int x1); - -void R_MapTiltedPlane_C(int y, int x1); -void R_MapTiltedPlane_rgba(int y, int x); -void R_MapColoredPlane_C(int y, int x1); -void R_MapColoredPlane_rgba(int y, int x1); - visplane_t *R_FindPlane ( const secplane_t &height, FTextureID picnum, diff --git a/src/r_segs.cpp b/src/r_segs.cpp index ad242b2f9..2476e07ac 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -52,6 +52,7 @@ #include "r_3dfloors.h" #include "v_palette.h" #include "r_data/colormaps.h" +#include "r_drawer_context.h" #define WALLYREPEAT 8 @@ -172,19 +173,19 @@ CVAR(Bool, r_drawmirrors, true, 0) float *MaskedSWall; float MaskedScaleY; -static void BlastMaskedColumn (void (*blastfunc)(const BYTE *pixels, const FTexture::Span *spans), FTexture *tex) +static void BlastMaskedColumn (int x, void (*blastfunc)(int x, const BYTE *pixels, const FTexture::Span *spans), FTexture *tex) { // calculate lighting if (fixedcolormap == NULL && fixedlightlev < 0) { - R_SetColorMapLight(basecolormap, rw_light, wallshade); + DrawerContext::SetLight(basecolormap, rw_light, wallshade); } - dc_iscale = xs_Fix<16>::ToFix(MaskedSWall[dc_x] * MaskedScaleY); + DrawerContext::SetTextureStep(xs_Fix<16>::ToFix(MaskedSWall[x] * MaskedScaleY)); if (sprflipvert) - sprtopscreen = CenterY + dc_texturemid * spryscale; + sprtopscreen = CenterY + DrawerContext::TextureMid() * spryscale; else - sprtopscreen = CenterY - dc_texturemid * spryscale; + sprtopscreen = CenterY - DrawerContext::TextureMid() * spryscale; // killough 1/25/98: here's where Medusa came in, because // it implicitly assumed that the column was all one patch. @@ -194,10 +195,12 @@ static void BlastMaskedColumn (void (*blastfunc)(const BYTE *pixels, const FText // the Medusa effect. The fix is to construct true columns // when forming multipatched textures (see r_data.c). + DrawerContext::SetMaskedColumnState(mfloorclip, mceilingclip, spryscale, sprtopscreen, sprflipvert); + // draw the texture const FTexture::Span *spans; - const BYTE *pixels = tex->GetColumn (maskedtexturecol[dc_x] >> FRACBITS, &spans); - blastfunc (pixels, spans); + const BYTE *pixels = tex->GetColumn (maskedtexturecol[x] >> FRACBITS, &spans); + blastfunc (x, pixels, spans); rw_light += rw_lightstep; spryscale += rw_scalestep; } @@ -243,7 +246,7 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) // [RH] modified because we don't use user-definable translucency maps ESPSResult drawmode; - drawmode = R_SetPatchStyle (LegacyRenderStyles[curline->linedef->flags & ML_ADDTRANS ? STYLE_Add : STYLE_Translucent], + drawmode = DrawerContext::SetPatchStyle (LegacyRenderStyles[curline->linedef->flags & ML_ADDTRANS ? STYLE_Add : STYLE_Translucent], (float)MIN(curline->linedef->alpha, 1.), 0, 0); if ((drawmode == DontDraw && !ds->bFogBoundary && !ds->bFakeBoundary)) @@ -295,7 +298,7 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) // [RH] Draw fog partition if (ds->bFogBoundary) { - R_DrawFogBoundary (x1, x2, mceilingclip, mfloorclip); + DrawerContext::DrawFogBoundary (x1, x2, mceilingclip, mfloorclip); if (ds->maskedtexturecol == -1) { goto clearfog; @@ -313,9 +316,9 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) rw_scalestep = ds->iscalestep; if (fixedlightlev >= 0) - R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + DrawerContext::SetLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - R_SetColorMapLight(fixedcolormap, 0, 0); + DrawerContext::SetLight(fixedcolormap, 0, 0); // find positioning texheight = tex->GetScaledHeightDouble(); @@ -326,11 +329,11 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) } if (curline->linedef->flags & ML_DONTPEGBOTTOM) { - dc_texturemid = MAX(frontsector->GetPlaneTexZ(sector_t::floor), backsector->GetPlaneTexZ(sector_t::floor)) + texheight; + DrawerContext::SetTextureMid(MAX(frontsector->GetPlaneTexZ(sector_t::floor), backsector->GetPlaneTexZ(sector_t::floor)) + texheight); } else { - dc_texturemid = MIN(frontsector->GetPlaneTexZ(sector_t::ceiling), backsector->GetPlaneTexZ(sector_t::ceiling)); + DrawerContext::SetTextureMid(MIN(frontsector->GetPlaneTexZ(sector_t::ceiling), backsector->GetPlaneTexZ(sector_t::ceiling))); } rowoffset = curline->sidedef->GetTextureYOffset(side_t::mid); @@ -349,21 +352,21 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) { // rowoffset is added before the multiply so that the masked texture will // still be positioned in world units rather than texels. - dc_texturemid += rowoffset - ViewPos.Z; - textop = dc_texturemid; - dc_texturemid *= MaskedScaleY; + DrawerContext::SetTextureMid(DrawerContext::TextureMid() + rowoffset - ViewPos.Z); + textop = DrawerContext::TextureMid(); + DrawerContext::SetTextureMid(DrawerContext::TextureMid() * MaskedScaleY); } else { // rowoffset is added outside the multiply so that it positions the texture // by texels instead of world units. - textop = dc_texturemid + rowoffset / MaskedScaleY - ViewPos.Z; - dc_texturemid = (dc_texturemid - ViewPos.Z) * MaskedScaleY + rowoffset; + textop = DrawerContext::TextureMid() + rowoffset / MaskedScaleY - ViewPos.Z; + DrawerContext::SetTextureMid((DrawerContext::TextureMid() - ViewPos.Z) * MaskedScaleY + rowoffset); } if (sprflipvert) { MaskedScaleY = -MaskedScaleY; - dc_texturemid -= tex->GetHeight() << FRACBITS; + DrawerContext::SetTextureMid(DrawerContext::TextureMid() - tex->GetHeight()); } // [RH] Don't bother drawing segs that are completely offscreen @@ -438,9 +441,9 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) // draw the columns one at a time if (drawmode == DoDraw0) { - for (dc_x = x1; dc_x < x2; ++dc_x) + for (int x = x1; x < x2; ++x) { - BlastMaskedColumn (R_DrawMaskedColumn, tex); + BlastMaskedColumn (x, DrawerContext::DrawMaskedColumn, tex); } } else @@ -451,29 +454,29 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) if (x1 >= x2) goto clearfog; - dc_x = x1; + int x = x1; - while ((dc_x < stop) && (dc_x & 3)) + while ((x < stop) && (x & 3)) { - BlastMaskedColumn (R_DrawMaskedColumn, tex); - dc_x++; + BlastMaskedColumn (x, DrawerContext::DrawMaskedColumn, tex); + x++; } - while (dc_x < stop) + while (x < stop) { - rt_initcols(nullptr); - BlastMaskedColumn (R_DrawMaskedColumnHoriz, tex); dc_x++; - BlastMaskedColumn (R_DrawMaskedColumnHoriz, tex); dc_x++; - BlastMaskedColumn (R_DrawMaskedColumnHoriz, tex); dc_x++; - BlastMaskedColumn (R_DrawMaskedColumnHoriz, tex); - rt_draw4cols (dc_x - 3); - dc_x++; + DrawerContext::RtInitCols(nullptr); + BlastMaskedColumn (x, DrawerContext::DrawMaskedColumnHoriz, tex); + BlastMaskedColumn (x + 1, DrawerContext::DrawMaskedColumnHoriz, tex); + BlastMaskedColumn (x + 2, DrawerContext::DrawMaskedColumnHoriz, tex); + BlastMaskedColumn (x + 3, DrawerContext::DrawMaskedColumnHoriz, tex); + DrawerContext::DrawRt4cols (x); + x += 4; } - while (dc_x < x2) + while (x < x2) { - BlastMaskedColumn (R_DrawMaskedColumn, tex); - dc_x++; + BlastMaskedColumn (x, DrawerContext::DrawMaskedColumn, tex); + x++; } } } @@ -483,13 +486,13 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) { // rowoffset is added before the multiply so that the masked texture will // still be positioned in world units rather than texels. - dc_texturemid = (dc_texturemid - ViewPos.Z + rowoffset) * MaskedScaleY; + DrawerContext::SetTextureMid((DrawerContext::TextureMid() - ViewPos.Z + rowoffset) * MaskedScaleY); } else { // rowoffset is added outside the multiply so that it positions the texture // by texels instead of world units. - dc_texturemid = (dc_texturemid - ViewPos.Z) * MaskedScaleY + rowoffset; + DrawerContext::SetTextureMid((DrawerContext::TextureMid() - ViewPos.Z) * MaskedScaleY + rowoffset); } WallC.sz1 = ds->sz1; @@ -535,7 +538,7 @@ void R_RenderMaskedSegRange (drawseg_t *ds, int x1, int x2) } clearfog: - R_FinishSetPatchStyle (); + DrawerContext::FinishSetPatchStyle (); if (ds->bFakeBoundary & 3) { R_RenderFakeWallRange(ds, x1, x2); @@ -567,11 +570,11 @@ void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover) fixed_t Alpha = Scale(rover->alpha, OPAQUE, 255); ESPSResult drawmode; - drawmode = R_SetPatchStyle (LegacyRenderStyles[rover->flags & FF_ADDITIVETRANS ? STYLE_Add : STYLE_Translucent], + drawmode = DrawerContext::SetPatchStyle (LegacyRenderStyles[rover->flags & FF_ADDITIVETRANS ? STYLE_Add : STYLE_Translucent], Alpha, 0, 0); if(drawmode == DontDraw) { - R_FinishSetPatchStyle(); + DrawerContext::FinishSetPatchStyle(); return; } @@ -613,26 +616,26 @@ void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover) { rowoffset += rw_pic->GetHeight(); } - dc_texturemid = (planez - ViewPos.Z) * yscale; + DrawerContext::SetTextureMid((planez - ViewPos.Z) * yscale); if (rw_pic->bWorldPanning) { // rowoffset is added before the multiply so that the masked texture will // still be positioned in world units rather than texels. - dc_texturemid = dc_texturemid + rowoffset * yscale; + DrawerContext::SetTextureMid(DrawerContext::TextureMid() + rowoffset * yscale); rw_offset = xs_RoundToInt(rw_offset * xscale); } else { // rowoffset is added outside the multiply so that it positions the texture // by texels instead of world units. - dc_texturemid += rowoffset; + DrawerContext::SetTextureMid(DrawerContext::TextureMid() + rowoffset); } if (fixedlightlev >= 0) - R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + DrawerContext::SetLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - R_SetColorMapLight(fixedcolormap, 0, 0); + DrawerContext::SetLight(fixedcolormap, 0, 0); WallC.sz1 = ds->sz1; WallC.sz2 = ds->sz2; @@ -660,7 +663,7 @@ void R_RenderFakeWall(drawseg_t *ds, int x1, int x2, F3DFloor *rover) PrepLWall (lwall, curline->sidedef->TexelLength*xscale, ds->sx1, ds->sx2); wallscan_np2_ds(ds, x1, x2, wallupper, walllower, MaskedSWall, lwall, yscale); - R_FinishSetPatchStyle(); + DrawerContext::FinishSetPatchStyle(); } // kg3D - walls of fake floors @@ -1065,360 +1068,6 @@ void R_RenderFakeWallRange (drawseg_t *ds, int x1, int x2) return; } -// Draw a column with support for non-power-of-two ranges -uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv_step, uint32_t uv_max, const BYTE *source, DWORD(*draw1column)()) -{ - int pixelsize = r_swtruecolor ? 4 : 1; - if (uv_max == 0) // power of two - { - int count = y2 - y1; - - dc_source = source; - dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; - dc_count = count; - dc_iscale = uv_step; - dc_texturefrac = uv_start; - draw1column(); - - uint64_t step64 = uv_step; - uint64_t pos64 = uv_start; - return (uint32_t)(pos64 + step64 * count); - } - else - { - uint32_t uv_pos = uv_start; - - uint32_t left = y2 - y1; - while (left > 0) - { - uint32_t available = uv_max - uv_pos; - uint32_t next_uv_wrap = available / uv_step; - if (available % uv_step != 0) - next_uv_wrap++; - uint32_t count = MIN(left, next_uv_wrap); - - dc_source = source; - dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; - dc_count = count; - dc_iscale = uv_step; - dc_texturefrac = uv_pos; - draw1column(); - - left -= count; - uv_pos += uv_step * count; - if (uv_pos >= uv_max) - uv_pos -= uv_max; - } - - return uv_pos; - } -} - -// Draw four columns with support for non-power-of-two ranges -void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_step, uint32_t uv_max, const BYTE **source, void(*draw4columns)()) -{ - int pixelsize = r_swtruecolor ? 4 : 1; - if (uv_max == 0) // power of two, no wrap handling needed - { - int count = y2 - y1; - for (int i = 0; i < 4; i++) - { - bufplce[i] = source[i]; - vplce[i] = uv_pos[i]; - vince[i] = uv_step[i]; - - uint64_t step64 = uv_step[i]; - uint64_t pos64 = uv_pos[i]; - uv_pos[i] = (uint32_t)(pos64 + step64 * count); - } - dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; - dc_count = count; - draw4columns(); - } - else - { - dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; - for (int i = 0; i < 4; i++) - bufplce[i] = source[i]; - - uint32_t left = y2 - y1; - while (left > 0) - { - // Find which column wraps first - uint32_t count = left; - for (int i = 0; i < 4; i++) - { - uint32_t available = uv_max - uv_pos[i]; - uint32_t next_uv_wrap = available / uv_step[i]; - if (available % uv_step[i] != 0) - next_uv_wrap++; - count = MIN(next_uv_wrap, count); - } - - // Draw until that column wraps - for (int i = 0; i < 4; i++) - { - vplce[i] = uv_pos[i]; - vince[i] = uv_step[i]; - } - dc_count = count; - draw4columns(); - - // Wrap the uv position - for (int i = 0; i < 4; i++) - { - uv_pos[i] += uv_step[i] * count; - if (uv_pos[i] >= uv_max) - uv_pos[i] -= uv_max; - } - - left -= count; - } - } -} - -// Calculates a wrapped uv start position value for a column -void calc_uv_start_and_step(int y1, float swal, double yrepeat, uint32_t uv_height, int fracbits, uint32_t &uv_start_out, uint32_t &uv_step_out) -{ - double uv_stepd = swal * yrepeat; - - // Find start uv in [0-uv_height[ range. - // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. - double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / uv_height; - v = v - floor(v); - v *= uv_height; - v *= (1 << fracbits); - - uv_start_out = (uint32_t)v; - uv_step_out = xs_ToFixed(fracbits, uv_stepd); -} - -typedef DWORD(*Draw1ColumnFuncPtr)(); -typedef void(*Draw4ColumnsFuncPtr)(); - -void wallscan_any( - int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, - const BYTE *(*getcol)(FTexture *tex, int x), - void(setupwallscan(int bits,Draw1ColumnFuncPtr &draw1, Draw4ColumnsFuncPtr &draw2))) -{ - if (rw_pic->UseType == FTexture::TEX_Null) - return; - - uint32_t uv_height = rw_pic->GetHeight(); - uint32_t fracbits = 32 - rw_pic->HeightBits; - uint32_t uv_max = uv_height << fracbits; - - DWORD(*draw1column)(); - void(*draw4columns)(); - setupwallscan(fracbits, draw1column, draw4columns); - - fixed_t xoffset = rw_offset; - - bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); - if (fixed) - { - palookupoffse[0] = dc_colormap; - palookupoffse[1] = dc_colormap; - palookupoffse[2] = dc_colormap; - palookupoffse[3] = dc_colormap; - palookuplight[0] = 0; - palookuplight[1] = 0; - palookuplight[2] = 0; - palookuplight[3] = 0; - } - - if (fixedcolormap) - R_SetColorMapLight(fixedcolormap, 0, 0); - else - R_SetColorMapLight(basecolormap, 0, 0); - - float light = rw_light; - - // Calculate where 4 column alignment begins and ends: - int aligned_x1 = clamp((x1 + 3) / 4 * 4, x1, x2); - int aligned_x2 = clamp(x2 / 4 * 4, x1, x2); - - // First unaligned columns: - for (int x = x1; x < aligned_x1; x++, light += rw_lightstep) - { - int y1 = uwal[x]; - int y2 = dwal[x]; - if (y2 <= y1) - continue; - - if (!fixed) - R_SetColorMapLight(basecolormap, light, wallshade); - - const BYTE *source = getcol(rw_pic, (lwal[x] + xoffset) >> FRACBITS); - - uint32_t uv_start, uv_step; - calc_uv_start_and_step(y1, swal[x], yrepeat, uv_height, fracbits, uv_start, uv_step); - - wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, source, draw1column); - } - - // The aligned columns - for (int x = aligned_x1; x < aligned_x2; x += 4) - { - // Find y1, y2, light and uv values for four columns: - int y1[4] = { uwal[x], uwal[x + 1], uwal[x + 2], uwal[x + 3] }; - int y2[4] = { dwal[x], dwal[x + 1], dwal[x + 2], dwal[x + 3] }; - - const BYTE *source[4]; - for (int i = 0; i < 4; i++) - source[i] = getcol(rw_pic, (lwal[x + i] + xoffset) >> FRACBITS); - - float lights[4]; - for (int i = 0; i < 4; i++) - { - lights[i] = light; - light += rw_lightstep; - } - - uint32_t uv_pos[4], uv_step[4]; - for (int i = 0; i < 4; i++) - calc_uv_start_and_step(y1[i], swal[x + i], yrepeat, uv_height, fracbits, uv_pos[i], uv_step[i]); - - // Figure out where we vertically can start and stop drawing 4 columns in one go - int middle_y1 = y1[0]; - int middle_y2 = y2[0]; - for (int i = 1; i < 4; i++) - { - middle_y1 = MAX(y1[i], middle_y1); - middle_y2 = MIN(y2[i], middle_y2); - } - - // If we got an empty column in our set we cannot draw 4 columns in one go: - bool empty_column_in_set = false; - for (int i = 0; i < 4; i++) - { - if (y2[i] <= y1[i]) - empty_column_in_set = true; - } - - if (empty_column_in_set || middle_y2 <= middle_y1) - { - for (int i = 0; i < 4; i++) - { - if (y2[i] <= y1[i]) - continue; - - if (!fixed) - R_SetColorMapLight(basecolormap, lights[i], wallshade); - wallscan_drawcol1(x + i, y1[i], y2[i], uv_pos[i], uv_step[i], uv_max, source[i], draw1column); - } - continue; - } - - // Draw the first rows where not all 4 columns are active - for (int i = 0; i < 4; i++) - { - if (!fixed) - R_SetColorMapLight(basecolormap, lights[i], wallshade); - - if (y1[i] < middle_y1) - uv_pos[i] = wallscan_drawcol1(x + i, y1[i], middle_y1, uv_pos[i], uv_step[i], uv_max, source[i], draw1column); - } - - // Draw the area where all 4 columns are active - if (!fixed) - { - for (int i = 0; i < 4; i++) - { - if (r_swtruecolor) - { - palookupoffse[i] = basecolormap->Maps; - palookuplight[i] = LIGHTSCALE(lights[i], wallshade); - } - else - { - palookupoffse[i] = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); - palookuplight[i] = 0; - } - } - } - wallscan_drawcol4(x, middle_y1, middle_y2, uv_pos, uv_step, uv_max, source, draw4columns); - - // Draw the last rows where not all 4 columns are active - for (int i = 0; i < 4; i++) - { - if (!fixed) - R_SetColorMapLight(basecolormap, lights[i], wallshade); - - if (middle_y2 < y2[i]) - uv_pos[i] = wallscan_drawcol1(x + i, middle_y2, y2[i], uv_pos[i], uv_step[i], uv_max, source[i], draw1column); - } - } - - // The last unaligned columns: - for (int x = aligned_x2; x < x2; x++, light += rw_lightstep) - { - int y1 = uwal[x]; - int y2 = dwal[x]; - if (y2 <= y1) - continue; - - if (!fixed) - R_SetColorMapLight(basecolormap, light, wallshade); - - const BYTE *source = getcol(rw_pic, (lwal[x] + xoffset) >> FRACBITS); - - uint32_t uv_start, uv_step; - calc_uv_start_and_step(y1, swal[x], yrepeat, uv_height, fracbits, uv_start, uv_step); - - wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, source, draw1column); - } - - NetUpdate (); -} - -void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) -{ - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) - { - setupvline(bits); - line1 = dovline1; - line4 = dovline4; - }); -} - -void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) -{ - if (!rw_pic->bMasked) // Textures that aren't masked can use the faster wallscan. - { - wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); - } - else - { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) - { - setupmvline(bits); - line1 = domvline1; - line4 = domvline4; - }); - } -} - -void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) -{ - static fixed_t(*tmvline1)(); - static void(*tmvline4)(); - if (!R_GetTransMaskDrawers(&tmvline1, &tmvline4)) - { - // The current translucency is unsupported, so draw with regular maskwallscan instead. - maskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); - } - else - { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) - { - setuptmvline(bits); - line1 = reinterpret_cast(tmvline1); - line4 = tmvline4; - }); - } -} - void wallscan_striped (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat) { FDynamicColormap *startcolormap = basecolormap; @@ -1444,7 +1093,7 @@ void wallscan_striped (int x1, int x2, short *uwal, short *dwal, float *swal, fi { down[j] = clamp (most3[j], up[j], dwal[j]); } - wallscan (x1, x2, up, down, swal, lwal, yrepeat); + DrawerContext::DrawWall (x1, x2, up, down, swal, lwal, yrepeat, rw_pic, rw_offset); up = down; down = (down == most1) ? most2 : most1; } @@ -1455,7 +1104,7 @@ void wallscan_striped (int x1, int x2, short *uwal, short *dwal, float *swal, fi *lit->p_lightlevel, lit->lightsource != NULL) + r_actualextralight); } - wallscan (x1, x2, up, dwal, swal, lwal, yrepeat); + DrawerContext::DrawWall (x1, x2, up, dwal, swal, lwal, yrepeat, rw_pic, rw_offset); basecolormap = startcolormap; wallshade = startshade; } @@ -1464,20 +1113,20 @@ static void call_wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, { if (mask) { - if (colfunc == basecolfunc) + if (DrawerContext::IsBaseColumn()) { - maskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat); + DrawerContext::DrawMaskedWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, rw_pic, rw_offset); } else { - transmaskwallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat); + DrawerContext::DrawTransMaskedWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, rw_pic, rw_offset); } } else { if (fixedcolormap != NULL || fixedlightlev >= 0 || !(frontsector->e && frontsector->e->XFloor.lightlist.Size())) { - wallscan(x1, x2, uwal, dwal, swal, lwal, yrepeat); + DrawerContext::DrawWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, rw_pic, rw_offset); } else { @@ -1513,14 +1162,14 @@ void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t if (yrepeat >= 0) { // normal orientation: draw strips from top to bottom - partition = top - fmod(top - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); + partition = top - fmod(top - DrawerContext::TextureMid() / yrepeat - ViewPos.Z, scaledtexheight); if (partition == top) { partition -= scaledtexheight; } up = uwal; down = most1; - dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; + DrawerContext::SetTextureMid((partition - ViewPos.Z) * yrepeat + texheight); while (partition > bot) { int j = OWallMost(most3, partition - ViewPos.Z, &WallC); @@ -1535,16 +1184,16 @@ void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t down = (down == most1) ? most2 : most1; } partition -= scaledtexheight; - dc_texturemid -= texheight; + DrawerContext::SetTextureMid(DrawerContext::TextureMid() - texheight); } call_wallscan(x1, x2, up, dwal, swal, lwal, yrepeat, mask); } else { // upside down: draw strips from bottom to top - partition = bot - fmod(bot - dc_texturemid / yrepeat - ViewPos.Z, scaledtexheight); + partition = bot - fmod(bot - DrawerContext::TextureMid() / yrepeat - ViewPos.Z, scaledtexheight); up = most1; down = dwal; - dc_texturemid = (partition - ViewPos.Z) * yrepeat + texheight; + DrawerContext::SetTextureMid((partition - ViewPos.Z) * yrepeat + texheight); while (partition < top) { int j = OWallMost(most3, partition - ViewPos.Z, &WallC); @@ -1559,7 +1208,7 @@ void wallscan_np2(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t up = (up == most1) ? most2 : most1; } partition -= scaledtexheight; - dc_texturemid -= texheight; + DrawerContext::SetTextureMid(DrawerContext::TextureMid() - texheight); } call_wallscan(x1, x2, uwal, down, swal, lwal, yrepeat, mask); } @@ -1612,9 +1261,9 @@ void R_RenderSegLoop () fixed_t xoffset = rw_offset; if (fixedlightlev >= 0) - R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + DrawerContext::SetLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - R_SetColorMapLight(fixedcolormap, 0, 0); + DrawerContext::SetLight(fixedcolormap, 0, 0); // clip wall to the floor and ceiling for (x = x1; x < x2; ++x) @@ -1695,7 +1344,7 @@ void R_RenderSegLoop () { // one sided line if (midtexture->UseType != FTexture::TEX_Null && viewactive) { - dc_texturemid = rw_midtexturemid; + DrawerContext::SetTextureMid(rw_midtexturemid); rw_pic = midtexture; xscale = rw_pic->Scale.X * rw_midtexturescalex; yscale = rw_pic->Scale.Y * rw_midtexturescaley; @@ -1738,7 +1387,7 @@ void R_RenderSegLoop () } if (viewactive) { - dc_texturemid = rw_toptexturemid; + DrawerContext::SetTextureMid(rw_toptexturemid); rw_pic = toptexture; xscale = rw_pic->Scale.X * rw_toptexturescalex; yscale = rw_pic->Scale.Y * rw_toptexturescaley; @@ -1784,7 +1433,7 @@ void R_RenderSegLoop () } if (viewactive) { - dc_texturemid = rw_bottomtexturemid; + DrawerContext::SetTextureMid(rw_bottomtexturemid); rw_pic = bottomtexture; xscale = rw_pic->Scale.X * rw_bottomtexturescalex; yscale = rw_pic->Scale.Y * rw_bottomtexturescaley; @@ -2974,7 +2623,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, } yscale = decal->ScaleY; - dc_texturemid = WallSpriteTile->TopOffset + (zpos - ViewPos.Z) / yscale; + DrawerContext::SetTextureMid(WallSpriteTile->TopOffset + (zpos - ViewPos.Z) / yscale); // Clip sprite to drawseg x1 = MAX(clipper->x1, x1); @@ -3011,11 +2660,11 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, rw_light = rw_lightleft + (x1 - WallC.sx1) * rw_lightstep; if (fixedlightlev >= 0) - R_SetColorMapLight(usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + DrawerContext::SetLight(usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - R_SetColorMapLight(fixedcolormap, 0, 0); + DrawerContext::SetLight(fixedcolormap, 0, 0); else if (!foggy && (decal->RenderFlags & RF_FULLBRIGHT)) - R_SetColorMapLight(usecolormap, 0, 0); + DrawerContext::SetLight(usecolormap, 0, 0); else calclighting = true; @@ -3024,7 +2673,7 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { sprflipvert = true; yscale = -yscale; - dc_texturemid -= WallSpriteTile->GetHeight(); + DrawerContext::SetTextureMid(DrawerContext::TextureMid() - WallSpriteTile->GetHeight()); } else { @@ -3034,10 +2683,9 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, MaskedScaleY = float(1 / yscale); do { - dc_x = x1; ESPSResult mode; - mode = R_SetPatchStyle (decal->RenderStyle, (float)decal->Alpha, decal->Translation, decal->AlphaColor); + mode = DrawerContext::SetPatchStyle (decal->RenderStyle, (float)decal->Alpha, decal->Translation, decal->AlphaColor); // R_SetPatchStyle can modify basecolormap. if (rereadcolormap) @@ -3053,48 +2701,50 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, { int stop4; + int x = x1; + if (mode == DoDraw0) { // 1 column at a time - stop4 = dc_x; + stop4 = x; } else // DoDraw1 { // up to 4 columns at a time stop4 = x2 & ~3; } - while ((dc_x < stop4) && (dc_x & 3)) + while ((x < stop4) && (x & 3)) { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap, rw_light, wallshade); + DrawerContext::SetLight(usecolormap, rw_light, wallshade); } - R_WallSpriteColumn (R_DrawMaskedColumn); - dc_x++; + R_WallSpriteColumn (x, DrawerContext::DrawMaskedColumn); + x++; } - while (dc_x < stop4) + while (x < stop4) { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap, rw_light, wallshade); + DrawerContext::SetLight(usecolormap, rw_light, wallshade); } - rt_initcols(nullptr); - for (int zz = 4; zz; --zz) + DrawerContext::RtInitCols(nullptr); + for (int zz = 0; zz < 4; ++zz) { - R_WallSpriteColumn (R_DrawMaskedColumnHoriz); - dc_x++; + R_WallSpriteColumn (x + zz, DrawerContext::DrawMaskedColumnHoriz); } - rt_draw4cols (dc_x - 4); + DrawerContext::DrawRt4cols (x); + x += 4; } - while (dc_x < x2) + while (x < x2) { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap, rw_light, wallshade); + DrawerContext::SetLight(usecolormap, rw_light, wallshade); } - R_WallSpriteColumn (R_DrawMaskedColumn); - dc_x++; + R_WallSpriteColumn (x, DrawerContext::DrawMaskedColumn); + x++; } } @@ -3103,14 +2753,11 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, // needrepeat will be 0, and the while will fail. mceilingclip = floorclip; mfloorclip = wallbottom; - R_FinishSetPatchStyle (); + DrawerContext::FinishSetPatchStyle (); } while (needrepeat--); - colfunc = basecolfunc; - hcolfunc_post1 = rt_map1col; - hcolfunc_post4 = rt_map4cols; - - R_FinishSetPatchStyle (); + DrawerContext::SetBaseStyle(); + DrawerContext::FinishSetPatchStyle (); done: WallC = savecoord; } diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index c4347236d..480cdd02b 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -32,6 +32,7 @@ ** */ +#define DRAWER_INTERNALS #include "r_local.h" #include "v_palette.h" diff --git a/src/r_things.cpp b/src/r_things.cpp index 836f58690..b856b968a 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -58,7 +58,7 @@ #include "r_plane.h" #include "r_segs.h" #include "r_3dfloors.h" -#include "r_draw_rgba.h" +#include "r_drawer_context.h" #include "v_palette.h" #include "r_data/r_translate.h" #include "r_data/colormaps.h" @@ -229,12 +229,6 @@ vissprite_t *R_NewVisSprite (void) return *(vissprite_p-1); } -// -// R_DrawMaskedColumn -// Used for sprites and masked mid textures. -// Masked means: partly transparent, i.e. stored -// in posts/runs of opaque pixels. -// short* mfloorclip; short* mceilingclip; @@ -243,88 +237,6 @@ double sprtopscreen; bool sprflipvert; -void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *span) -{ - int pixelsize = r_swtruecolor ? 4 : 1; - const fixed_t centeryfrac = FLOAT2FIXED(CenterY); - const fixed_t texturemid = FLOAT2FIXED(dc_texturemid); - while (span->Length != 0) - { - const int length = span->Length; - const int top = span->TopOffset; - - // calculate unclipped screen coordinates for post - dc_yl = xs_RoundToInt(sprtopscreen + spryscale * top); - dc_yh = xs_RoundToInt(sprtopscreen + spryscale * (top + length)) - 1; - - if (sprflipvert) - { - swapvalues (dc_yl, dc_yh); - } - - if (dc_yh >= mfloorclip[dc_x]) - { - dc_yh = mfloorclip[dc_x] - 1; - } - if (dc_yl < mceilingclip[dc_x]) - { - dc_yl = mceilingclip[dc_x]; - } - - if (dc_yl <= dc_yh) - { - if (sprflipvert) - { - dc_texturefrac = (dc_yl*dc_iscale) - (top << FRACBITS) - - FixedMul (centeryfrac, dc_iscale) - texturemid; - const fixed_t maxfrac = length << FRACBITS; - while (dc_texturefrac >= maxfrac) - { - if (++dc_yl > dc_yh) - goto nextpost; - dc_texturefrac += dc_iscale; - } - fixed_t endfrac = dc_texturefrac + (dc_yh-dc_yl)*dc_iscale; - while (endfrac < 0) - { - if (--dc_yh < dc_yl) - goto nextpost; - endfrac -= dc_iscale; - } - } - else - { - dc_texturefrac = texturemid - (top << FRACBITS) - + (dc_yl*dc_iscale) - FixedMul (centeryfrac-FRACUNIT, dc_iscale); - while (dc_texturefrac < 0) - { - if (++dc_yl > dc_yh) - goto nextpost; - dc_texturefrac += dc_iscale; - } - fixed_t endfrac = dc_texturefrac + (dc_yh-dc_yl)*dc_iscale; - const fixed_t maxfrac = length << FRACBITS; - if (dc_yh < mfloorclip[dc_x]-1 && endfrac < maxfrac - dc_iscale) - { - dc_yh++; - } - else while (endfrac >= maxfrac) - { - if (--dc_yh < dc_yl) - goto nextpost; - endfrac -= dc_iscale; - } - } - dc_source = column + top; - dc_dest = (ylookup[dc_yl] + dc_x) * pixelsize + dc_destorg; - dc_count = dc_yh - dc_yl + 1; - colfunc (); - } -nextpost: - span++; - } -} - // [ZZ] // R_ClipSpriteColumnWithPortals // @@ -361,7 +273,7 @@ static inline void R_CollectPortals() } } -static inline bool R_ClipSpriteColumnWithPortals(vissprite_t* spr) +bool R_ClipSpriteColumnWithPortals(int x, vissprite_t* spr) { // [ZZ] 10.01.2016: don't clip sprites from the root of a skybox. if (CurrentPortalInSkybox) @@ -380,7 +292,7 @@ static inline bool R_ClipSpriteColumnWithPortals(vissprite_t* spr) continue; // now if current column is covered by this drawseg, we clip it away - if ((dc_x >= seg->x1) && (dc_x < seg->x2)) + if ((x >= seg->x1) && (x < seg->x2)) return true; } @@ -409,15 +321,15 @@ void R_DrawVisSprite (vissprite_t *vis) } fixed_t centeryfrac = FLOAT2FIXED(CenterY); - R_SetColorMapLight(vis->Style.BaseColormap, 0, vis->Style.ColormapNum << FRACBITS); + DrawerContext::SetLight(vis->Style.BaseColormap, 0, vis->Style.ColormapNum << FRACBITS); - mode = R_SetPatchStyle (vis->Style.RenderStyle, vis->Style.Alpha, vis->Translation, vis->FillColor); + mode = DrawerContext::SetPatchStyle (vis->Style.RenderStyle, vis->Style.Alpha, vis->Translation, vis->FillColor); if (vis->Style.RenderStyle == LegacyRenderStyles[STYLE_Shaded]) { // For shaded sprites, R_SetPatchStyle sets a dc_colormap to an alpha table, but // it is the brightest one. We need to get back to the proper light level for // this sprite. - R_SetColorMapLight(dc_fcolormap, 0, vis->Style.ColormapNum << FRACBITS); + DrawerContext::SetLight(DrawerContext::LightColormap(), 0, vis->Style.ColormapNum << FRACBITS); } if (mode != DontDraw) @@ -436,65 +348,67 @@ void R_DrawVisSprite (vissprite_t *vis) tex = vis->pic; spryscale = vis->yscale; sprflipvert = false; - dc_iscale = FLOAT2FIXED(1 / vis->yscale); + DrawerContext::SetTextureStep(FLOAT2FIXED(1 / vis->yscale)); frac = vis->startfrac; xiscale = vis->xiscale; - dc_texturemid = vis->texturemid; + DrawerContext::SetTextureMid(vis->texturemid); if (vis->renderflags & RF_YFLIP) { sprflipvert = true; spryscale = -spryscale; - dc_iscale = -dc_iscale; - dc_texturemid -= vis->pic->GetHeight(); - sprtopscreen = CenterY + dc_texturemid * spryscale; + DrawerContext::SetTextureStep(-DrawerContext::TextureStep()); + DrawerContext::SetTextureMid(DrawerContext::TextureMid() - vis->pic->GetHeight()); + sprtopscreen = CenterY + DrawerContext::TextureMid() * spryscale; } else { sprflipvert = false; - sprtopscreen = CenterY - dc_texturemid * spryscale; + sprtopscreen = CenterY - DrawerContext::TextureMid() * spryscale; } - dc_x = vis->x1; + int x = vis->x1; x2 = vis->x2; - if (dc_x < x2) + if (x < x2) { - while ((dc_x < stop4) && (dc_x & 3)) + DrawerContext::SetMaskedColumnState(mfloorclip, mceilingclip, spryscale, sprtopscreen, sprflipvert); + + while ((x < stop4) && (x & 3)) { pixels = tex->GetColumn (frac >> FRACBITS, &spans); - if (ispsprite || !R_ClipSpriteColumnWithPortals(vis)) - R_DrawMaskedColumn (pixels, spans); - dc_x++; + if (ispsprite || !R_ClipSpriteColumnWithPortals(x, vis)) + DrawerContext::DrawMaskedColumn (x, pixels, spans); + x++; frac += xiscale; } - while (dc_x < stop4) + while (x < stop4) { - rt_initcols(nullptr); - for (int zz = 4; zz; --zz) + DrawerContext::RtInitCols(nullptr); + for (int zz = 0; zz < 4; ++zz) { pixels = tex->GetColumn (frac >> FRACBITS, &spans); - if (ispsprite || !R_ClipSpriteColumnWithPortals(vis)) - R_DrawMaskedColumnHoriz (pixels, spans); - dc_x++; + if (ispsprite || !R_ClipSpriteColumnWithPortals(x + zz, vis)) + DrawerContext::DrawMaskedColumnHoriz (x + zz, pixels, spans); frac += xiscale; } - rt_draw4cols (dc_x - 4); + DrawerContext::DrawRt4cols(x); + x += 4; } - while (dc_x < x2) + while (x < x2) { pixels = tex->GetColumn (frac >> FRACBITS, &spans); - if (ispsprite || !R_ClipSpriteColumnWithPortals(vis)) - R_DrawMaskedColumn (pixels, spans); - dc_x++; + if (ispsprite || !R_ClipSpriteColumnWithPortals(x, vis)) + DrawerContext::DrawMaskedColumn (x, pixels, spans); + x++; frac += xiscale; } } } - R_FinishSetPatchStyle (); + DrawerContext::FinishSetPatchStyle (); NetUpdate (); } @@ -511,7 +425,7 @@ void R_DrawWallSprite(vissprite_t *spr) WallT.InitFromWallCoords(&spr->wallc); PrepWall(swall, lwall, spr->pic->GetWidth() << FRACBITS, x1, x2); iyscale = 1 / spr->yscale; - dc_texturemid = (spr->gzt - ViewPos.Z) * iyscale; + DrawerContext::SetTextureMid((spr->gzt - ViewPos.Z) * iyscale); if (spr->renderflags & RF_XFLIP) { int right = (spr->pic->GetWidth() << FRACBITS) - 1; @@ -539,11 +453,11 @@ void R_DrawWallSprite(vissprite_t *spr) rw_lightstep = float((GlobVis / spr->wallc.sz2 - rw_lightleft) / (spr->wallc.sx2 - spr->wallc.sx1)); rw_light = rw_lightleft + (x1 - spr->wallc.sx1) * rw_lightstep; if (fixedlightlev >= 0) - R_SetColorMapLight(usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + DrawerContext::SetLight(usecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); else if (fixedcolormap != NULL) - R_SetColorMapLight(fixedcolormap, 0, 0); + DrawerContext::SetLight(fixedcolormap, 0, 0); else if (!foggy && (spr->renderflags & RF_FULLBRIGHT)) - R_SetColorMapLight(usecolormap, 0, 0); + DrawerContext::SetLight(usecolormap, 0, 0); else calclighting = true; @@ -553,7 +467,7 @@ void R_DrawWallSprite(vissprite_t *spr) { sprflipvert = true; iyscale = -iyscale; - dc_texturemid -= spr->pic->GetHeight(); + DrawerContext::SetTextureMid(DrawerContext::TextureMid() - spr->pic->GetHeight()); } else { @@ -562,10 +476,9 @@ void R_DrawWallSprite(vissprite_t *spr) MaskedScaleY = (float)iyscale; - dc_x = x1; ESPSResult mode; - mode = R_SetPatchStyle (spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor); + mode = DrawerContext::SetPatchStyle (spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor); // R_SetPatchStyle can modify basecolormap. if (rereadcolormap) @@ -581,71 +494,74 @@ void R_DrawWallSprite(vissprite_t *spr) { int stop4; + int x = x1; + if (mode == DoDraw0) { // 1 column at a time - stop4 = dc_x; + stop4 = x; } else // DoDraw1 { // up to 4 columns at a time stop4 = x2 & ~3; } - while ((dc_x < stop4) && (dc_x & 3)) + while ((x < stop4) && (x & 3)) { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap, rw_light, shade); + DrawerContext::SetLight(usecolormap, rw_light, shade); } - if (!R_ClipSpriteColumnWithPortals(spr)) - R_WallSpriteColumn(R_DrawMaskedColumn); - dc_x++; + if (!R_ClipSpriteColumnWithPortals(x, spr)) + R_WallSpriteColumn(x, DrawerContext::DrawMaskedColumn); + x++; } - while (dc_x < stop4) + while (x < stop4) { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap, rw_light, shade); + DrawerContext::SetLight(usecolormap, rw_light, shade); } - rt_initcols(nullptr); - for (int zz = 4; zz; --zz) + DrawerContext::RtInitCols(nullptr); + for (int zz = 0; zz < 4; ++zz) { - if (!R_ClipSpriteColumnWithPortals(spr)) - R_WallSpriteColumn(R_DrawMaskedColumnHoriz); - dc_x++; + if (!R_ClipSpriteColumnWithPortals(x + zz, spr)) + R_WallSpriteColumn(x + zz, DrawerContext::DrawMaskedColumnHoriz); } - rt_draw4cols(dc_x - 4); + DrawerContext::DrawRt4cols(x); + x += 4; } - while (dc_x < x2) + while (x < x2) { if (calclighting) { // calculate lighting - R_SetColorMapLight(usecolormap, rw_light, shade); + DrawerContext::SetLight(usecolormap, rw_light, shade); } - if (!R_ClipSpriteColumnWithPortals(spr)) - R_WallSpriteColumn(R_DrawMaskedColumn); - dc_x++; + if (!R_ClipSpriteColumnWithPortals(x, spr)) + R_WallSpriteColumn(x, DrawerContext::DrawMaskedColumn); + x++; } } - R_FinishSetPatchStyle(); + DrawerContext::FinishSetPatchStyle(); } -void R_WallSpriteColumn (void (*drawfunc)(const BYTE *column, const FTexture::Span *spans)) +void R_WallSpriteColumn (int x, void (*drawfunc)(int x, const BYTE *column, const FTexture::Span *spans)) { - float iscale = swall[dc_x] * MaskedScaleY; - dc_iscale = FLOAT2FIXED(iscale); + float iscale = swall[x] * MaskedScaleY; + DrawerContext::SetTextureStep(FLOAT2FIXED(iscale)); spryscale = 1 / iscale; if (sprflipvert) - sprtopscreen = CenterY + dc_texturemid * spryscale; + sprtopscreen = CenterY + DrawerContext::TextureMid() * spryscale; else - sprtopscreen = CenterY - dc_texturemid * spryscale; + sprtopscreen = CenterY - DrawerContext::TextureMid() * spryscale; const BYTE *column; const FTexture::Span *spans; - column = WallSpriteTile->GetColumn (lwall[dc_x] >> FRACBITS, &spans); - dc_texturefrac = 0; - drawfunc (column, spans); + column = WallSpriteTile->GetColumn (lwall[x] >> FRACBITS, &spans); + DrawerContext::SetTextureFrac(0); + DrawerContext::SetMaskedColumnState(mfloorclip, mceilingclip, spryscale, sprtopscreen, sprflipvert); + drawfunc (x, column, spans); rw_light += rw_lightstep; } @@ -655,18 +571,18 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop int flags = 0; // Do setup for blending. - R_SetColorMapLight(spr->Style.BaseColormap, 0, spr->Style.ColormapNum << FRACBITS); - mode = R_SetPatchStyle(spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor); + DrawerContext::SetLight(spr->Style.BaseColormap, 0, spr->Style.ColormapNum << FRACBITS); + mode = DrawerContext::SetPatchStyle(spr->Style.RenderStyle, spr->Style.Alpha, spr->Translation, spr->FillColor); if (mode == DontDraw) { return; } - if (colfunc == fuzzcolfunc || colfunc == R_FillColumn) + if (DrawerContext::IsFuzzColumn() || DrawerContext::IsFillColumn()) { flags = DVF_OFFSCREEN | DVF_SPANSONLY; } - else if (colfunc != basecolfunc) + else if (!DrawerContext::IsBaseColumn()) { flags = DVF_OFFSCREEN; } @@ -692,32 +608,32 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop { if (!(flags & DVF_SPANSONLY) && (x & 3) == 0) { - rt_initcols(OffscreenColorBuffer + x * OffscreenBufferHeight); + DrawerContext::RtInitCols(OffscreenColorBuffer + x * OffscreenBufferHeight); } for (FCoverageBuffer::Span *span = OffscreenCoverageBuffer->Spans[x]; span != NULL; span = span->NextSpan) { if (flags & DVF_SPANSONLY) { - dc_x = x; - dc_yl = span->Start; - dc_yh = span->Stop - 1; - dc_count = span->Stop - span->Start; - dc_dest = (ylookup[span->Start] + x) * pixelsize + dc_destorg; - colfunc(); + DrawerContext::SetX(x); + DrawerContext::SetY1(span->Start); + DrawerContext::SetY2(span->Stop - 1); + DrawerContext::SetDrawCount(span->Stop - span->Start); + DrawerContext::SetDest(x, span->Start); + DrawerContext::DrawColumn(); } else { - rt_span_coverage(x, span->Start, span->Stop - 1); + DrawerContext::RtSpanCoverage(x, span->Start, span->Stop - 1); } } if (!(flags & DVF_SPANSONLY) && (x & 3) == 3) { - rt_draw4cols(x - 3); + DrawerContext::DrawRt4cols(x - 3); } } } - R_FinishSetPatchStyle(); + DrawerContext::FinishSetPatchStyle(); NetUpdate(); } @@ -2585,7 +2501,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade, } } -static void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis) +void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis) { const int x1 = vis->x1; const int x2 = vis->x2; @@ -2610,120 +2526,24 @@ static void R_DrawMaskedSegsBehindParticle (const vissprite_t *vis) } } -void R_DrawParticle_C (vissprite_t *vis) +void R_DrawParticle(vissprite_t *vis) { - DWORD *bg2rgb; - int spacing; - BYTE *dest; - DWORD fg; BYTE color = vis->Style.BaseColormap->Maps[(vis->Style.ColormapNum << COLORMAPSHIFT) + vis->startfrac]; int yl = vis->y1; - int ycount = vis->y2 - yl + 1; - int x1 = vis->x1; - int countbase = vis->x2 - x1; - - R_DrawMaskedSegsBehindParticle (vis); - - // vis->renderflags holds translucency level (0-255) - { - fixed_t fglevel, bglevel; - DWORD *fg2rgb; - - fglevel = ((vis->renderflags + 1) << 8) & ~0x3ff; - bglevel = FRACUNIT-fglevel; - fg2rgb = Col2RGB8[fglevel>>10]; - bg2rgb = Col2RGB8[bglevel>>10]; - fg = fg2rgb[color]; - } - - /* - - spacing = RenderTarget->GetPitch() - countbase; - dest = ylookup[yl] + x1 + dc_destorg; - - do - { - int count = countbase; - do - { - DWORD bg = bg2rgb[*dest]; - bg = (fg+bg) | 0x1f07c1f; - *dest++ = RGB32k.All[bg & (bg>>15)]; - } while (--count); - dest += spacing; - } while (--ycount);*/ - - // original was row-wise - // width = countbase - // height = ycount - - spacing = RenderTarget->GetPitch(); - - for (int x = x1; x < (x1+countbase); x++) - { - dc_x = x; - if (R_ClipSpriteColumnWithPortals(vis)) - continue; - dest = ylookup[yl] + x + dc_destorg; - for (int y = 0; y < ycount; y++) - { - DWORD bg = bg2rgb[*dest]; - bg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[bg & (bg>>15)]; - dest += spacing; - } - } -} - -void R_DrawParticle_rgba(vissprite_t *vis) -{ - int spacing; - uint32_t *dest; - BYTE color = vis->Style.BaseColormap->Maps[vis->startfrac]; - int yl = vis->y1; - int ycount = vis->y2 - yl + 1; + int yh = vis->y2; int x1 = vis->x1; int countbase = vis->x2 - x1; R_DrawMaskedSegsBehindParticle(vis); - - DrawerCommandQueue::WaitForWorkers(); - - uint32_t fg = shade_pal_index_simple(color, calc_light_multiplier(LIGHTSCALE(0, vis->Style.ColormapNum << FRACBITS))); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; // vis->renderflags holds translucency level (0-255) - fixed_t fglevel = ((vis->renderflags + 1) << 8) & ~0x3ff; - uint32_t alpha = fglevel * 256 / FRACUNIT; - uint32_t inv_alpha = 256 - alpha; - - fg_red *= alpha; - fg_green *= alpha; - fg_blue *= alpha; - - spacing = RenderTarget->GetPitch(); + int alpha = vis->renderflags; for (int x = x1; x < (x1 + countbase); x++) { - dc_x = x; - if (R_ClipSpriteColumnWithPortals(vis)) + if (R_ClipSpriteColumnWithPortals(x, vis)) continue; - dest = ylookup[yl] + x + (uint32_t*)dc_destorg; - for (int y = 0; y < ycount; y++) - { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red + bg_red * inv_alpha) / 256; - uint32_t green = (fg_green + bg_green * inv_alpha) / 256; - uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += spacing; - } + DrawerContext::FillTransColumn(x, yl, yh, color, alpha); } } @@ -2769,9 +2589,7 @@ void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, sprcosang = FLOAT2FIXED(dasprang.Cos()) >> 2; sprsinang = FLOAT2FIXED(-dasprang.Sin()) >> 2; - R_SetupDrawSlab(colormap); - - int pixelsize = r_swtruecolor ? 4 : 1; + DrawerContext::SetSlabLight(colormap); // Select mip level i = abs(DMulScale6(dasprx - globalposx, cosang, daspry - globalposy, sinang)); @@ -3026,25 +2844,25 @@ void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, if (!(flags & DVF_OFFSCREEN)) { // Draw directly to the screen. - R_DrawSlab(xxr - xxl, yplc[xxl], z2 - z1, yinc, col, (ylookup[z1] + lxt + xxl) * pixelsize + dc_destorg); + DrawerContext::DrawSlab(xxr - xxl, yplc[xxl], z2 - z1, yinc, col, lxt + xxl, z1); } else { // Record the area covered and possibly draw to an offscreen buffer. - dc_yl = z1; - dc_yh = z2 - 1; - dc_count = z2 - z1; - dc_iscale = yinc; + DrawerContext::SetY1(z1); + DrawerContext::SetY2(z2 - 1); + DrawerContext::SetDrawCount(z2 - z1); + DrawerContext::SetTextureStep(yinc); for (int x = xxl; x < xxr; ++x) { OffscreenCoverageBuffer->InsertSpan(lxt + x, z1, z2); if (!(flags & DVF_SPANSONLY)) { - dc_x = lxt + x; - rt_initcols(OffscreenColorBuffer + (dc_x & ~3) * OffscreenBufferHeight); - dc_source = col; - dc_texturefrac = yplc[xxl]; - hcolfunc_pre(); + DrawerContext::RtInitCols(OffscreenColorBuffer + ((lxt + x) & ~3) * OffscreenBufferHeight); + DrawerContext::SetX(lxt + x); + DrawerContext::SetSource(col); + DrawerContext::SetTextureFrac(yplc[xxl]); + DrawerContext::DrawHColumnPre(); } } } diff --git a/src/r_things.h b/src/r_things.h index f5cd30e00..869de4da2 100644 --- a/src/r_things.h +++ b/src/r_things.h @@ -96,11 +96,8 @@ struct vissprite_t struct particle_t; -extern void(*R_DrawParticle)(vissprite_t *); -void R_DrawParticle_C (vissprite_t *); -void R_DrawParticle_rgba (vissprite_t *); - void R_ProjectParticle (particle_t *, const sector_t *sector, int shade, int fakeside); +void R_DrawParticle (vissprite_t *vis); extern int MaxVisSprites; @@ -112,7 +109,6 @@ extern vissprite_t **vissprite_p; extern short zeroarray[MAXWIDTH]; extern short screenheightarray[MAXWIDTH]; -// vars for R_DrawMaskedColumn extern short* mfloorclip; extern short* mceilingclip; extern double spryscale; @@ -126,8 +122,7 @@ extern double pspriteyscale; extern FTexture *WallSpriteTile; -void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *spans); -void R_WallSpriteColumn (void (*drawfunc)(const BYTE *column, const FTexture::Span *spans)); +void R_WallSpriteColumn (int x, void (*drawfunc)(int x, const BYTE *column, const FTexture::Span *spans)); void R_CacheSprite (spritedef_t *sprite); void R_SortVisSprites (int (*compare)(const void *, const void *), size_t first); diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 6a8dad047..21cbd1a33 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -43,8 +43,7 @@ #include "r_defs.h" #include "r_utility.h" #ifndef NO_SWRENDER -#include "r_draw.h" -#include "r_draw_rgba.h" +#include "r_drawer_context.h" #include "r_main.h" #include "r_things.h" #endif @@ -130,12 +129,14 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) static short bottomclipper[MAXWIDTH], topclipper[MAXWIDTH]; const BYTE *translation = NULL; - if (r_swtruecolor != IsBgra()) + DCanvas *destorgsave = DrawerContext::Canvas(); + if (screen->GetBuffer() == NULL) { - r_swtruecolor = IsBgra(); - R_InitColumnDrawers(); + I_FatalError("Attempt to write to buffer of hardware canvas"); } + DrawerContext::SetCanvas(screen); + if (parms.masked) { spanptr = &spans; @@ -172,22 +173,15 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) if (translation != NULL) { - R_SetTranslationMap((lighttable_t *)translation); + DrawerContext::SetTranslationMap((lighttable_t *)translation); } else { - R_SetTranslationMap(identitymap); + DrawerContext::SetTranslationMap(nullptr); } - fixedcolormap = dc_fcolormap; - ESPSResult mode = R_SetPatchStyle (parms.style, parms.Alpha, 0, parms.fillcolor); - - BYTE *destorgsave = dc_destorg; - dc_destorg = screen->GetBuffer(); - if (dc_destorg == NULL) - { - I_FatalError("Attempt to write to buffer of hardware canvas"); - } + fixedcolormap = DrawerContext::LightColormap(); + ESPSResult mode = DrawerContext::SetPatchStyle (parms.style, parms.Alpha, 0, parms.fillcolor); double x0 = parms.x - parms.left * parms.destwidth / parms.texwidth; double y0 = parms.y - parms.top * parms.destheight / parms.texheight; @@ -220,11 +214,11 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) assert(spryscale > 0); sprflipvert = false; - //dc_iscale = FLOAT2FIXED(iyscale); - //dc_texturemid = (-y0) * iyscale; - //dc_iscale = 0xffffffffu / (unsigned)spryscale; - dc_iscale = FLOAT2FIXED(1 / spryscale); - dc_texturemid = (CenterY - 1 - sprtopscreen) * dc_iscale / 65536; + //DrawerContext::SetTextureStep(FLOAT2FIXED(iyscale)); + //DrawerContext::SetTextureMid((-y0) * iyscale); + //DrawerContext::SetTextureStep(0xffffffffu / (unsigned)spryscale); + DrawerContext::SetTextureStep(FLOAT2FIXED(1 / spryscale)); + DrawerContext::SetTextureMid((CenterY - 1 - sprtopscreen) * DrawerContext::TextureStep() / 65536); fixed_t frac = 0; double xiscale = img->GetWidth() / parms.destwidth; double x2 = x0 + parms.destwidth; @@ -278,14 +272,14 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) mode = DoDraw0; } - dc_x = int(x0); + int x = int(x0); int x2_i = int(x2); fixed_t xiscale_i = FLOAT2FIXED(xiscale); if (mode == DoDraw0) { // One column at a time - stop4 = dc_x; + stop4 = x; } else // DoDraw1` { @@ -293,42 +287,44 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) stop4 = x2_i & ~3; } - if (dc_x < x2_i) + if (x < x2_i) { - while ((dc_x < stop4) && (dc_x & 3)) + DrawerContext::SetMaskedColumnState(mfloorclip, mceilingclip, spryscale, sprtopscreen, sprflipvert); + + while ((x < stop4) && (x & 3)) { pixels = img->GetColumn(frac >> FRACBITS, spanptr); - R_DrawMaskedColumn(pixels, spans); - dc_x++; + DrawerContext::DrawMaskedColumn(x, pixels, spans); + x++; frac += xiscale_i; } - while (dc_x < stop4) + while (x < stop4) { - rt_initcols(nullptr); - for (int zz = 4; zz; --zz) + DrawerContext::RtInitCols(nullptr); + for (int zz = 0; zz < 4; ++zz) { pixels = img->GetColumn(frac >> FRACBITS, spanptr); - R_DrawMaskedColumnHoriz(pixels, spans); - dc_x++; + DrawerContext::DrawMaskedColumnHoriz(x + zz, pixels, spans); frac += xiscale_i; } - rt_draw4cols(dc_x - 4); + DrawerContext::DrawRt4cols(x); + x += 4; } - while (dc_x < x2_i) + while (x < x2_i) { pixels = img->GetColumn(frac >> FRACBITS, spanptr); - R_DrawMaskedColumn(pixels, spans); - dc_x++; + DrawerContext::DrawMaskedColumn(x, pixels, spans); + x++; frac += xiscale_i; } } CenterY = centeryback; } - R_FinishSetPatchStyle (); + DrawerContext::FinishSetPatchStyle (); - dc_destorg = destorgsave; + DrawerContext::SetCanvas(destorgsave); if (ticdup != 0 && menuactive == MENU_Off) { @@ -1024,9 +1020,11 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) if (IsBgra()) { + int inv_level = 64 - level; + uint32_t *spot = (uint32_t*)GetBuffer() + oldyyshifted + xx; - uint32_t fg = shade_pal_index_simple(basecolor, calc_light_multiplier(0)); + uint32_t fg = GPalette.BaseColors[basecolor].d; uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1035,9 +1033,9 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) uint32_t bg_green = (*spot >> 8) & 0xff; uint32_t bg_blue = (*spot) & 0xff; - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; + uint32_t red = (fg_red * level + bg_red * inv_level + 1) / 64; + uint32_t green = (fg_green * level + bg_green * inv_level + 1) / 64; + uint32_t blue = (fg_blue * level + bg_blue * inv_level + 1) / 64; *spot = 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -1399,16 +1397,15 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, sinrot = sin(rotation.Radians()); // Setup constant texture mapping parameters. - R_SetupSpanBits(tex); if (colormap) - R_SetSpanColormap(colormap, clamp(shade >> FRACBITS, 0, NUMCOLORMAPS - 1)); + DrawerContext::SetSpanLight(colormap, 0, clamp(shade >> FRACBITS, 0, NUMCOLORMAPS - 1)); else - R_SetSpanColormap(&identitycolormap, 0); - R_SetSpanSource(r_swtruecolor ? (const BYTE*)tex->GetPixelsBgra() : tex->GetPixels()); - scalex = double(1u << (32 - ds_xbits)) / scalex; - scaley = double(1u << (32 - ds_ybits)) / scaley; - ds_xstep = xs_RoundToInt(cosrot * scalex); - ds_ystep = xs_RoundToInt(sinrot * scaley); + DrawerContext::SetSpanLight(nullptr, 0, 0); + DrawerContext::SetSpanSource(tex); + scalex = double(1u << (32 - DrawerContext::SpanXBits())) / scalex; + scaley = double(1u << (32 - DrawerContext::SpanYBits())) / scaley; + DrawerContext::SetSpanXStep(xs_RoundToInt(cosrot * scalex)); + DrawerContext::SetSpanYStep(xs_RoundToInt(sinrot * scaley)); // Travel down the right edge and create an outline of that edge. pt1 = toppt; @@ -1472,9 +1469,9 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, #if 0 memset(this->Buffer + y * this->Pitch + x1, (int)tex, x2 - x1); #else - ds_y = y; - ds_x1 = x1; - ds_x2 = x2 - 1; + DrawerContext::SetSpanY(y); + DrawerContext::SetSpanX1(x1); + DrawerContext::SetSpanX2(x2 - 1); DVector2 tex(x1 - originx, y - originy); if (dorotate) @@ -1483,10 +1480,10 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, tex.X = t * cosrot - tex.Y * sinrot; tex.Y = tex.Y * cosrot + t * sinrot; } - ds_xfrac = xs_RoundToInt(tex.X * scalex); - ds_yfrac = xs_RoundToInt(tex.Y * scaley); + DrawerContext::SetSpanXFrac(xs_RoundToInt(tex.X * scalex)); + DrawerContext::SetSpanYFrac(xs_RoundToInt(tex.Y * scaley)); - R_DrawSpan(); + DrawerContext::DrawSimplePolySpan(); #endif } x += xinc; From 70dbde4f78a99f167c3dcfc41491cb3f5cfc6cc8 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 17 Jun 2016 07:38:05 +0200 Subject: [PATCH 047/100] Added r_multithreaded CVAR and fixed some broken colors --- src/r_draw_rgba.cpp | 2057 +++++++++++++++++++++--------------------- src/r_draw_rgba.h | 8 +- src/r_drawt_rgba.cpp | 540 +++++------ 3 files changed, 1314 insertions(+), 1291 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 420b63dff..3e4bf241a 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -58,6 +58,8 @@ extern float rw_light; extern float rw_lightstep; extern int wallshade; +CVAR(Bool, r_multithreaded, true, 0) + ///////////////////////////////////////////////////////////////////////////// DrawerCommandQueue *DrawerCommandQueue::Instance() @@ -229,28 +231,28 @@ void DrawerCommandQueue::StopThreads() class DrawColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_texturefrac; - DWORD dc_iscale; - fixed_t dc_light; - const BYTE *dc_source; - int dc_pitch; - ShadeConstants dc_shade_constants; - BYTE *dc_colormap; + int _count; + BYTE *_dest; + DWORD _texturefrac; + DWORD _iscale; + fixed_t _light; + const BYTE *_source; + int _pitch; + ShadeConstants _shade_constants; + BYTE *_colormap; public: DrawColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_texturefrac = ::dc_texturefrac; - dc_iscale = ::dc_iscale; - dc_light = ::dc_light; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; - dc_colormap = ::dc_colormap; + _count = dc_count; + _dest = dc_dest; + _texturefrac = dc_texturefrac; + _iscale = dc_iscale; + _light = dc_light; + _source = dc_source; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _colormap = dc_colormap; } void Execute(DrawerThread *thread) override @@ -260,28 +262,28 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); // Zero length, column does not exceed a pixel. if (count <= 0) return; // Framebuffer destination address. - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; // Determine scaling, // which is the only mapping to be done. - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); // [RH] Get local copies of these variables so that the compiler // has a better chance of optimizing this well. - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - BYTE *colormap = dc_colormap; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + BYTE *colormap = _colormap; do { @@ -296,20 +298,20 @@ public: class FillColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - fixed_t dc_light; - int dc_pitch; - int dc_color; + int _count; + BYTE *_dest; + fixed_t _light; + int _pitch; + int _color; public: FillColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_light = ::dc_light; - dc_pitch = ::dc_pitch; - dc_color = ::dc_color; + _count = dc_count; + _dest = dc_dest; + _light = dc_light; + _pitch = dc_pitch; + _color = dc_color; } void Execute(DrawerThread *thread) override @@ -317,18 +319,18 @@ public: int count; uint32_t* dest; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - uint32_t light = calc_light_multiplier(dc_light); + uint32_t light = calc_light_multiplier(_light); { - int pitch = dc_pitch * thread->num_cores; - uint32_t color = shade_pal_index_simple(dc_color, light); + int pitch = _pitch * thread->num_cores; + uint32_t color = shade_pal_index_simple(_color, light); do { @@ -341,20 +343,20 @@ public: class FillAddColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - int dc_pitch; - fixed_t dc_light; - int dc_color; + int _count; + BYTE *_dest; + int _pitch; + fixed_t _light; + int _color; public: FillAddColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_color = ::dc_color; + _count = dc_count; + _dest = dc_dest; + _pitch = dc_pitch; + _light = dc_light; + _color = dc_color; } void Execute(DrawerThread *thread) override @@ -362,14 +364,14 @@ public: int count; uint32_t *dest; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -392,20 +394,20 @@ public: class FillAddClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - int dc_pitch; - fixed_t dc_light; - int dc_color; + int _count; + BYTE *_dest; + int _pitch; + fixed_t _light; + int _color; public: FillAddClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_color = ::dc_color; + _count = dc_count; + _dest = dc_dest; + _pitch = dc_pitch; + _light = dc_light; + _color = dc_color; } void Execute(DrawerThread *thread) override @@ -413,14 +415,14 @@ public: int count; uint32_t *dest; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -443,20 +445,20 @@ public: class FillSubClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - int dc_pitch; - int dc_color; - fixed_t dc_light; + int _count; + BYTE *_dest; + int _pitch; + int _color; + fixed_t _light; public: FillSubClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_pitch = ::dc_pitch; - dc_color = ::dc_color; - dc_light = ::dc_light; + _count = dc_count; + _dest = dc_dest; + _pitch = dc_pitch; + _color = dc_color; + _light = dc_light; } void Execute(DrawerThread *thread) override @@ -464,14 +466,14 @@ public: int count; uint32_t *dest; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -494,20 +496,20 @@ public: class FillRevSubClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - int dc_pitch; - int dc_color; - fixed_t dc_light; + int _count; + BYTE *_dest; + int _pitch; + int _color; + fixed_t _light; public: FillRevSubClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_pitch = ::dc_pitch; - dc_color = ::dc_color; - dc_light = ::dc_light; + _count = dc_count; + _dest = dc_dest; + _pitch = dc_pitch; + _color = dc_color; + _light = dc_light; } void Execute(DrawerThread *thread) override @@ -515,14 +517,14 @@ public: int count; uint32_t *dest; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -545,22 +547,22 @@ public: class DrawFuzzColumnRGBACommand : public DrawerCommand { - int dc_x; - int dc_yl; - int dc_yh; - BYTE *dc_destorg; - int dc_pitch; + int _x; + int _yl; + int _yh; + BYTE *_destorg; + int _pitch; int fuzzpos; int fuzzviewheight; public: DrawFuzzColumnRGBACommand() { - dc_x = ::dc_x; - dc_yl = ::dc_yl; - dc_yh = ::dc_yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; + _x = dc_x; + _yl = dc_yl; + _yh = dc_yh; + _destorg = dc_destorg; + _pitch = dc_pitch; fuzzpos = ::fuzzpos; fuzzviewheight = ::fuzzviewheight; } @@ -571,24 +573,24 @@ public: uint32_t *dest; // Adjust borders. Low... - if (dc_yl == 0) - dc_yl = 1; + if (_yl == 0) + _yl = 1; // .. and high. - if (dc_yh > fuzzviewheight) - dc_yh = fuzzviewheight; + if (_yh > fuzzviewheight) + _yh = fuzzviewheight; - count = thread->count_for_thread(dc_yl, dc_yh - dc_yl + 1); + count = thread->count_for_thread(_yl, _yh - _yl + 1); // Zero length. if (count <= 0) return; - dest = thread->dest_for_thread(dc_yl, dc_pitch, ylookup[dc_yl] + dc_x + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(_yl, _pitch, ylookup[_yl] + _x + (uint32_t*)_destorg); - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; int fuzzstep = thread->num_cores; - int fuzz = (fuzzpos + thread->skipped_by_thread(dc_yl)) % FUZZTABLE; + int fuzz = (fuzzpos + thread->skipped_by_thread(_yl)) % FUZZTABLE; while (count > 0) { @@ -622,32 +624,32 @@ public: class DrawAddColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - BYTE *dc_colormap; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + BYTE *_colormap; public: DrawAddColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_colormap = ::dc_colormap; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _colormap = dc_colormap; } void Execute(DrawerThread *thread) override @@ -657,25 +659,25 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - BYTE *colormap = dc_colormap; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; + BYTE *colormap = _colormap; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -703,28 +705,28 @@ public: class DrawTranslatedColumnRGBACommand : public DrawerCommand { - int dc_count; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - BYTE *dc_translation; - const BYTE *dc_source; - int dc_pitch; + int _count; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + BYTE *_translation; + const BYTE *_source; + int _pitch; public: DrawTranslatedColumnRGBACommand() { - dc_count = ::dc_count; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_translation = ::dc_translation; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; + _count = dc_count; + _light = dc_light; + _shade_constants = dc_shade_constants; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _translation = dc_translation; + _source = dc_source; + _pitch = dc_pitch; } void Execute(DrawerThread *thread) override @@ -734,23 +736,23 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { // [RH] Local copies of global vars to improve compiler optimizations - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; + BYTE *translation = _translation; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; do { @@ -764,28 +766,32 @@ public: class DrawTlatedAddColumnRGBACommand : public DrawerCommand { - int dc_count; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - BYTE *dc_translation; - const BYTE *dc_source; - int dc_pitch; + int _count; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + BYTE *_translation; + const BYTE *_source; + int _pitch; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawTlatedAddColumnRGBACommand() { - dc_count = ::dc_count; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_translation = ::dc_translation; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; + _count = dc_count; + _light = dc_light; + _shade_constants = dc_shade_constants; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _translation = dc_translation; + _source = dc_source; + _pitch = dc_pitch; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -795,25 +801,25 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; + BYTE *translation = _translation; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -842,28 +848,28 @@ public: class DrawShadedColumnRGBACommand : public DrawerCommand { private: - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - fixed_t dc_light; - const BYTE *dc_source; - lighttable_t *dc_colormap; - int dc_color; - int dc_pitch; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + fixed_t _light; + const BYTE *_source; + lighttable_t *_colormap; + int _color; + int _pitch; public: DrawShadedColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_light = ::dc_light; - dc_source = ::dc_source; - dc_colormap = ::dc_colormap; - dc_color = ::dc_color; - dc_pitch = ::dc_pitch; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _light = dc_light; + _source = dc_source; + _colormap = dc_colormap; + _color = dc_color; + _pitch = dc_pitch; } void Execute(DrawerThread *thread) override @@ -872,25 +878,25 @@ public: uint32_t *dest; fixed_t frac, fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; { - const BYTE *source = dc_source; - BYTE *colormap = dc_colormap; - int pitch = dc_pitch * thread->num_cores; + const BYTE *source = _source; + BYTE *colormap = _colormap; + int pitch = _pitch * thread->num_cores; do { @@ -915,30 +921,30 @@ public: class DrawAddClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawAddClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -948,23 +954,23 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -991,32 +997,32 @@ public: class DrawAddClampTranslatedColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - BYTE *dc_translation; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + BYTE *_translation; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawAddClampTranslatedColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_translation = ::dc_translation; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _translation = dc_translation; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -1026,24 +1032,24 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + BYTE *translation = _translation; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -1070,30 +1076,30 @@ public: class DrawSubClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawSubClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -1103,23 +1109,23 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -1146,32 +1152,32 @@ public: class DrawSubClampTranslatedColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - BYTE *dc_translation; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + BYTE *_translation; public: DrawSubClampTranslatedColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_translation = ::dc_translation; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _translation = dc_translation; } void Execute(DrawerThread *thread) override @@ -1181,24 +1187,24 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + BYTE *translation = _translation; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -1225,30 +1231,30 @@ public: class DrawRevSubClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawRevSubClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -1258,22 +1264,22 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -1300,32 +1306,32 @@ public: class DrawRevSubClampTranslatedColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - BYTE *dc_translation; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + BYTE *_translation; public: DrawRevSubClampTranslatedColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_translation = ::dc_translation; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _translation = dc_translation; } void Execute(DrawerThread *thread) override @@ -1335,24 +1341,24 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + BYTE *translation = _translation; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -1379,42 +1385,42 @@ public: class DrawSpanRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_x1; - int ds_x2; - int ds_y; - int ds_xbits; - int ds_ybits; - BYTE *dc_destorg; - fixed_t ds_light; - ShadeConstants ds_shade_constants; + const uint32_t *_source; + fixed_t _xfrac; + fixed_t _yfrac; + fixed_t _xstep; + fixed_t _ystep; + int _x1; + int _x2; + int _y; + int _xbits; + int _ybits; + BYTE *_destorg; + fixed_t _light; + ShadeConstants _shade_constants; public: DrawSpanRGBACommand() { - ds_source = (const uint32_t*)::ds_source; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; - dc_destorg = ::dc_destorg; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; + _source = (const uint32_t*)ds_source; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _xstep = ds_xstep; + _ystep = ds_ystep; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xbits = ds_xbits; + _ybits = ds_ybits; + _destorg = dc_destorg; + _light = ds_light; + _shade_constants = ds_shade_constants; } #ifdef NO_SSE void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1422,24 +1428,24 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. @@ -1458,9 +1464,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { @@ -1479,7 +1485,7 @@ public: #else void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1487,24 +1493,24 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. @@ -1604,9 +1610,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; int sse_count = count / 4; count -= sse_count * 4; @@ -1700,42 +1706,42 @@ public: class DrawSpanMaskedRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_light; - ShadeConstants ds_shade_constants; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - BYTE *dc_destorg; - int ds_x1; - int ds_x2; - int ds_y1; - int ds_y; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_xbits; - int ds_ybits; + const uint32_t *_source; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _xfrac; + fixed_t _yfrac; + BYTE *_destorg; + int _x1; + int _x2; + int _y1; + int _y; + fixed_t _xstep; + fixed_t _ystep; + int _xbits; + int _ybits; public: DrawSpanMaskedRGBACommand() { - ds_source = (const uint32_t*)::ds_source; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - dc_destorg = ::dc_destorg; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; + _source = (const uint32_t*)ds_source; + _light = ds_light; + _shade_constants = ds_shade_constants; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _destorg = dc_destorg; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xstep = ds_xstep; + _ystep = ds_ystep; + _xbits = ds_xbits; + _ybits = ds_ybits; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1743,24 +1749,24 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. do @@ -1780,9 +1786,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { uint32_t texdata; @@ -1803,42 +1809,46 @@ public: class DrawSpanTranslucentRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_light; - ShadeConstants ds_shade_constants; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - BYTE *dc_destorg; - int ds_x1; - int ds_x2; - int ds_y1; - int ds_y; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_xbits; - int ds_ybits; + const uint32_t *_source; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _xfrac; + fixed_t _yfrac; + BYTE *_destorg; + int _x1; + int _x2; + int _y1; + int _y; + fixed_t _xstep; + fixed_t _ystep; + int _xbits; + int _ybits; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawSpanTranslucentRGBACommand() { - ds_source = (const uint32_t *)::ds_source; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - dc_destorg = ::dc_destorg; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; + _source = (const uint32_t *)ds_source; + _light = ds_light; + _shade_constants = ds_shade_constants; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _destorg = dc_destorg; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xstep = ds_xstep; + _ystep = ds_ystep; + _xbits = ds_xbits; + _ybits = ds_ybits; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1846,27 +1856,27 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. do @@ -1894,9 +1904,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); @@ -1925,42 +1935,46 @@ public: class DrawSpanMaskedTranslucentRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_light; - ShadeConstants ds_shade_constants; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - BYTE *dc_destorg; - int ds_x1; - int ds_x2; - int ds_y1; - int ds_y; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_xbits; - int ds_ybits; + const uint32_t *_source; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _xfrac; + fixed_t _yfrac; + BYTE *_destorg; + int _x1; + int _x2; + int _y1; + int _y; + fixed_t _xstep; + fixed_t _ystep; + int _xbits; + int _ybits; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawSpanMaskedTranslucentRGBACommand() { - ds_source = (const uint32_t*)::ds_source; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - dc_destorg = ::dc_destorg; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; + _source = (const uint32_t*)ds_source; + _light = ds_light; + _shade_constants = ds_shade_constants; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _destorg = dc_destorg; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xstep = ds_xstep; + _ystep = ds_ystep; + _xbits = ds_xbits; + _ybits = ds_ybits; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1968,27 +1982,27 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. do @@ -2021,9 +2035,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { uint32_t texdata; @@ -2057,42 +2071,46 @@ public: class DrawSpanAddClampRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_light; - ShadeConstants ds_shade_constants; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - BYTE *dc_destorg; - int ds_x1; - int ds_x2; - int ds_y1; - int ds_y; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_xbits; - int ds_ybits; + const uint32_t *_source; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _xfrac; + fixed_t _yfrac; + BYTE *_destorg; + int _x1; + int _x2; + int _y1; + int _y; + fixed_t _xstep; + fixed_t _ystep; + int _xbits; + int _ybits; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawSpanAddClampRGBACommand() { - ds_source = (const uint32_t*)::ds_source; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - dc_destorg = ::dc_destorg; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; + _source = (const uint32_t*)ds_source; + _light = ds_light; + _shade_constants = ds_shade_constants; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _destorg = dc_destorg; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xstep = ds_xstep; + _ystep = ds_ystep; + _xbits = ds_xbits; + _ybits = ds_ybits; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -2100,27 +2118,27 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. do @@ -2148,9 +2166,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); @@ -2179,42 +2197,46 @@ public: class DrawSpanMaskedAddClampRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_light; - ShadeConstants ds_shade_constants; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - BYTE *dc_destorg; - int ds_x1; - int ds_x2; - int ds_y1; - int ds_y; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_xbits; - int ds_ybits; + const uint32_t *_source; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _xfrac; + fixed_t _yfrac; + BYTE *_destorg; + int _x1; + int _x2; + int _y1; + int _y; + fixed_t _xstep; + fixed_t _ystep; + int _xbits; + int _ybits; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawSpanMaskedAddClampRGBACommand() { - ds_source = (const uint32_t*)::ds_source; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - dc_destorg = ::dc_destorg; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; + _source = (const uint32_t*)ds_source; + _light = ds_light; + _shade_constants = ds_shade_constants; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _destorg = dc_destorg; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xstep = ds_xstep; + _ystep = ds_ystep; + _xbits = ds_xbits; + _ybits = ds_ybits; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -2222,27 +2244,27 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. do @@ -2275,9 +2297,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { uint32_t texdata; @@ -2311,33 +2333,33 @@ public: class FillSpanRGBACommand : public DrawerCommand { - int ds_x1; - int ds_x2; - int ds_y; - BYTE *dc_destorg; - fixed_t ds_light; - int ds_color; + int _x1; + int _x2; + int _y; + BYTE *_destorg; + fixed_t _light; + int _color; public: FillSpanRGBACommand() { - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - dc_destorg = ::dc_destorg; - ds_light = ::ds_light; - ds_color = ::ds_color; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _destorg = dc_destorg; + _light = ds_light; + _color = ds_color; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; - uint32_t *dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - int count = (ds_x2 - ds_x1 + 1); - uint32_t light = calc_light_multiplier(ds_light); - uint32_t color = shade_pal_index_simple(ds_color, light); + uint32_t *dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; + int count = (_x2 - _x1 + 1); + uint32_t light = calc_light_multiplier(_light); + uint32_t color = shade_pal_index_simple(_color, light); for (int i = 0; i < count; i++) dest[i] = color; } @@ -2345,45 +2367,45 @@ public: class Vlinec1RGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int vlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; public: Vlinec1RGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; vlinebits = ::vlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = vlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; do { @@ -2396,10 +2418,10 @@ public: class Vlinec4RGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; int vlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -2409,10 +2431,10 @@ class Vlinec4RGBACommand : public DrawerCommand public: Vlinec4RGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; vlinebits = ::vlinebits; for (int i = 0; i < 4; i++) { @@ -2426,12 +2448,12 @@ public: #ifdef NO_SSE void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = vlinebits; DWORD place; @@ -2440,11 +2462,11 @@ public: uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -2463,24 +2485,24 @@ public: #else void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = vlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; uint32_t light0 = calc_light_multiplier(palookuplight[0]); uint32_t light1 = calc_light_multiplier(palookuplight[1]); uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -2545,45 +2567,45 @@ public: class Mvlinec1RGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int mvlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; public: Mvlinec1RGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; mvlinebits = ::mvlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = mvlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; do { @@ -2600,10 +2622,10 @@ public: class Mvlinec4RGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; int mvlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -2613,10 +2635,10 @@ class Mvlinec4RGBACommand : public DrawerCommand public: Mvlinec4RGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; mvlinebits = ::mvlinebits; for (int i = 0; i < 4; i++) { @@ -2630,12 +2652,12 @@ public: #ifdef NO_SSE void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = mvlinebits; DWORD place; @@ -2644,11 +2666,11 @@ public: uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -2668,12 +2690,12 @@ public: #else void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = mvlinebits; uint32_t light0 = calc_light_multiplier(palookuplight[0]); @@ -2681,11 +2703,11 @@ public: uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -2756,52 +2778,52 @@ public: class Tmvline1AddRGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int tmvlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: Tmvline1AddRGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; tmvlinebits = ::tmvlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = tmvlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -2831,12 +2853,12 @@ public: class Tmvline4AddRGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; int tmvlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -2846,12 +2868,12 @@ class Tmvline4AddRGBACommand : public DrawerCommand public: Tmvline4AddRGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; tmvlinebits = ::tmvlinebits; for (int i = 0; i < 4; i++) { @@ -2864,12 +2886,12 @@ public: void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -2878,14 +2900,14 @@ public: light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -2923,52 +2945,52 @@ public: class Tmvline1AddClampRGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int tmvlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: Tmvline1AddClampRGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; tmvlinebits = ::tmvlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = tmvlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -2998,12 +3020,12 @@ public: class Tmvline4AddClampRGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; int tmvlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -3013,12 +3035,12 @@ class Tmvline4AddClampRGBACommand : public DrawerCommand public: Tmvline4AddClampRGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; tmvlinebits = ::tmvlinebits; for (int i = 0; i < 4; i++) { @@ -3031,12 +3053,12 @@ public: void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -3045,14 +3067,14 @@ public: light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -3090,52 +3112,52 @@ public: class Tmvline1SubClampRGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int tmvlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: Tmvline1SubClampRGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; tmvlinebits = ::tmvlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = tmvlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -3165,12 +3187,12 @@ public: class Tmvline4SubClampRGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; int tmvlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -3180,12 +3202,12 @@ class Tmvline4SubClampRGBACommand : public DrawerCommand public: Tmvline4SubClampRGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; tmvlinebits = ::tmvlinebits; for (int i = 0; i < 4; i++) { @@ -3198,12 +3220,12 @@ public: void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -3212,14 +3234,14 @@ public: light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -3257,52 +3279,52 @@ public: class Tmvline1RevSubClampRGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int tmvlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: Tmvline1RevSubClampRGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; tmvlinebits = ::tmvlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = tmvlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -3332,12 +3354,12 @@ public: class Tmvline4RevSubClampRGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; int tmvlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -3347,12 +3369,12 @@ class Tmvline4RevSubClampRGBACommand : public DrawerCommand public: Tmvline4RevSubClampRGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; tmvlinebits = ::tmvlinebits; for (int i = 0; i < 4; i++) { @@ -3365,12 +3387,12 @@ public: void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -3379,14 +3401,14 @@ public: light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -3427,9 +3449,9 @@ class DrawFogBoundaryLineRGBACommand : public DrawerCommand int _y; int _x; int _x2; - BYTE *dc_destorg; - fixed_t dc_light; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + fixed_t _light; + ShadeConstants _shade_constants; public: DrawFogBoundaryLineRGBACommand(int y, int x, int x2) @@ -3438,9 +3460,9 @@ public: _x = x; _x2 = x2; - dc_destorg = ::dc_destorg; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _light = dc_light; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override @@ -3452,10 +3474,10 @@ public: int x = _x; int x2 = _x2; - uint32_t *dest = ylookup[y] + (uint32_t*)dc_destorg; + uint32_t *dest = ylookup[y] + (uint32_t*)_destorg; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants constants = _shade_constants; do { @@ -3499,10 +3521,10 @@ class DrawTiltedSpanRGBACommand : public DrawerCommand int _y; int _x1; int _x2; - BYTE *dc_destorg; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - const BYTE *ds_source; + BYTE *_destorg; + fixed_t _light; + ShadeConstants _shade_constants; + const BYTE *_source; public: DrawTiltedSpanRGBACommand(int y, int x1, int x2) @@ -3511,8 +3533,8 @@ public: _x1 = x1; _x2 = x2; - dc_destorg = ::dc_destorg; - ds_source = ::ds_source; + _destorg = dc_destorg; + _source = ds_source; } void Execute(DrawerThread *thread) override @@ -3527,8 +3549,8 @@ public: // Slopes are broken currently in master. // Until R_DrawTiltedPlane is fixed we are just going to fill with a solid color. - uint32_t *source = (uint32_t*)ds_source; - uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; + uint32_t *source = (uint32_t*)_source; + uint32_t *dest = ylookup[y] + x1 + (uint32_t*)_destorg; int count = x2 - x1 + 1; while (count > 0) @@ -3544,9 +3566,9 @@ class DrawColoredSpanRGBACommand : public DrawerCommand int _y; int _x1; int _x2; - BYTE *dc_destorg; - fixed_t ds_light; - int ds_color; + BYTE *_destorg; + fixed_t _light; + int _color; public: DrawColoredSpanRGBACommand(int y, int x1, int x2) @@ -3555,9 +3577,9 @@ public: _x1 = x1; _x2 = x2; - dc_destorg = ::dc_destorg; - ds_light = ::ds_light; - ds_color = ::ds_color; + _destorg = dc_destorg; + _light = ds_light; + _color = ds_color; } void Execute(DrawerThread *thread) override @@ -3569,10 +3591,10 @@ public: int x1 = _x1; int x2 = _x2; - uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; + uint32_t *dest = ylookup[y] + x1 + (uint32_t*)_destorg; int count = (x2 - x1 + 1); - uint32_t light = calc_light_multiplier(ds_light); - uint32_t color = shade_pal_index_simple(ds_color, light); + uint32_t light = calc_light_multiplier(_light); + uint32_t color = shade_pal_index_simple(_color, light); for (int i = 0; i < count; i++) dest[i] = color; } @@ -3585,10 +3607,9 @@ class FillTransColumnRGBACommand : public DrawerCommand int _y2; int _color; int _a; - BYTE *dc_destorg; - int dc_pitch; - fixed_t ds_light; - int ds_color; + BYTE *_destorg; + int _pitch; + fixed_t _light; public: FillTransColumnRGBACommand(int x, int y1, int y2, int color, int a) @@ -3599,8 +3620,8 @@ public: _color = color; _a = a; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; + _destorg = dc_destorg; + _pitch = dc_pitch; } void Execute(DrawerThread *thread) override @@ -3627,8 +3648,8 @@ public: fg_green *= alpha; fg_blue *= alpha; - int spacing = dc_pitch * thread->num_cores; - uint32_t *dest = thread->dest_for_thread(y1, dc_pitch, ylookup[y1] + x + (uint32_t*)dc_destorg); + int spacing = _pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(y1, _pitch, ylookup[y1] + x + (uint32_t*)_destorg); for (int y = 0; y < ycount; y++) { diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index a91b54d74..6e35de9ff 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -178,17 +178,19 @@ public: class DrawerCommand { protected: - int dc_dest_y; + int _dest_y; public: DrawerCommand() { - dc_dest_y = static_cast((dc_dest - dc_destorg) / (dc_pitch * 4)); + _dest_y = static_cast((dc_dest - dc_destorg) / (dc_pitch * 4)); } virtual void Execute(DrawerThread *thread) = 0; }; +EXTERN_CVAR(Bool, r_multithreaded) + // Manages queueing up commands and executing them on worker threads class DrawerCommandQueue { @@ -232,7 +234,7 @@ public: static void QueueCommand(Types &&... args) { auto queue = Instance(); - if (queue->threaded_render == 0) + if (queue->threaded_render == 0 || !r_multithreaded) { T command(std::forward(args)...); command.Execute(&queue->single_core_thread); diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 4b6605b4a..0eabc48d8 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -61,8 +61,8 @@ class RtCopy1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; + BYTE *_destorg; + int _pitch; public: RtCopy1colRGBACommand(int hx, int sx, int yl, int yh) @@ -72,8 +72,8 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; + _destorg = dc_destorg; + _pitch = dc_pitch; } void Execute(DrawerThread *thread) override @@ -87,9 +87,9 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = thread->num_cores * 4; if (count & 1) { @@ -123,11 +123,11 @@ class RtMap1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - BYTE *dc_destorg; - int dc_pitch; - BYTE *dc_colormap; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE *_destorg; + int _pitch; + BYTE *_colormap; public: RtMap1colRGBACommand(int hx, int sx, int yl, int yh) @@ -137,11 +137,11 @@ public: this->yl = yl; this->yh = yh; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_colormap = ::dc_colormap; + _light = dc_light; + _shade_constants = dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _colormap = dc_colormap; } void Execute(DrawerThread *thread) override @@ -156,15 +156,15 @@ public: if (count <= 0) return; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = thread->num_cores * 4; - BYTE *colormap = dc_colormap; + BYTE *colormap = _colormap; if (count & 1) { *dest = shade_pal_index(colormap[*source], light, shade_constants); @@ -188,11 +188,11 @@ class RtMap4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - BYTE *dc_destorg; - int dc_pitch; - BYTE *colormap; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE *_destorg; + int _pitch; + BYTE *_colormap; public: RtMap4colsRGBACommand(int sx, int yl, int yh) @@ -201,11 +201,11 @@ public: this->yl = yl; this->yh = yh; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_colormap = ::dc_colormap; + _light = dc_light; + _shade_constants = dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _colormap = dc_colormap; } #ifdef NO_SSE @@ -221,15 +221,15 @@ public: if (count <= 0) return; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = thread->num_cores * 4; - BYTE *colormap = dc_colormap; + BYTE *colormap = _colormap; if (count & 1) { dest[0] = shade_pal_index(colormap[source[0]], light, shade_constants); @@ -268,16 +268,16 @@ public: if (count <= 0) return; - ShadeConstants shade_constants = dc_shade_constants; - uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = _shade_constants; + uint32_t light = calc_light_multiplier(_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = thread->num_cores * 4; - BYTE *colormap = dc_colormap; + BYTE *colormap = _colormap; if (shade_constants.simple_shade) { @@ -509,13 +509,13 @@ class RtAdd1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - BYTE *dc_colormap; + BYTE *_destorg; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + BYTE *_colormap; public: RtAdd1colRGBACommand(int hx, int sx, int yl, int yh) @@ -525,13 +525,13 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_colormap = ::dc_colormap; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _colormap = dc_colormap; } void Execute(DrawerThread *thread) override @@ -546,17 +546,17 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - BYTE *colormap = dc_colormap; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; + BYTE *colormap = _colormap; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t fg = shade_pal_index(colormap[*source], light, shade_constants); @@ -585,13 +585,13 @@ class RtAdd4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - BYTE *dc_colormap; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_destorg; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE *_colormap; + fixed_t _srcalpha; + fixed_t _destalpha; public: RtAdd4colsRGBACommand(int sx, int yl, int yh) @@ -600,13 +600,13 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_colormap = ::dc_colormap; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _colormap = dc_colormap; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } #ifdef NO_SSE @@ -622,17 +622,17 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - BYTE *colormap = dc_colormap; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; + BYTE *colormap = _colormap; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { for (int i = 0; i < 4; i++) @@ -670,19 +670,19 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); + uint32_t light = calc_light_multiplier(_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; - BYTE *colormap = dc_colormap; + BYTE *colormap = _colormap; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; if (shade_constants.simple_shade) { @@ -766,11 +766,11 @@ class RtShaded1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - lighttable_t *dc_colormap; - BYTE *dc_destorg; - int dc_pitch; - int dc_color; - fixed_t dc_light; + lighttable_t *_colormap; + BYTE *_destorg; + int _pitch; + int _color; + fixed_t _light; public: RtShaded1colRGBACommand(int hx, int sx, int yl, int yh) @@ -780,11 +780,11 @@ public: this->yl = yl; this->yh = yh; - dc_colormap = ::dc_colormap; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_color = ::dc_color; - dc_light = ::dc_light; + _colormap = dc_colormap; + _destorg = dc_destorg; + _pitch = dc_pitch; + _color = dc_color; + _light = dc_light; } void Execute(DrawerThread *thread) override @@ -800,13 +800,13 @@ public: if (count <= 0) return; - colormap = dc_colormap; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + colormap = _colormap; + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -835,11 +835,11 @@ class RtShaded4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - lighttable_t *dc_colormap; - int dc_color; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; + lighttable_t *_colormap; + int _color; + BYTE *_destorg; + int _pitch; + fixed_t _light; public: RtShaded4colsRGBACommand(int sx, int yl, int yh) @@ -848,11 +848,11 @@ public: this->yl = yl; this->yh = yh; - dc_colormap = ::dc_colormap; - dc_color = ::dc_color; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; + _colormap = dc_colormap; + _color = dc_color; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; } #ifdef NO_SSE @@ -869,13 +869,13 @@ public: if (count <= 0) return; - colormap = dc_colormap; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + colormap = _colormap; + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -914,13 +914,13 @@ public: if (count <= 0) return; - colormap = dc_colormap; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + colormap = _colormap; + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light))), _mm_setzero_si128()); + __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(_color, calc_light_multiplier(_light))), _mm_setzero_si128()); __m128i alpha_one = _mm_set1_epi16(64); do { @@ -959,12 +959,12 @@ class RtAddClamp1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_destorg; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: RtAddClamp1colRGBACommand(int hx, int sx, int yl, int yh) @@ -974,12 +974,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -994,16 +994,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t fg = shade_pal_index(*source, light, shade_constants); @@ -1031,12 +1031,12 @@ class RtAddClamp4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; public: RtAddClamp4colsRGBACommand(int sx, int yl, int yh) @@ -1045,12 +1045,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; } #ifdef NO_SSE @@ -1066,16 +1066,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { for (int i = 0; i < 4; i++) @@ -1112,18 +1112,18 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); + uint32_t light = calc_light_multiplier(_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; if (shade_constants.simple_shade) { @@ -1207,12 +1207,12 @@ class RtSubClamp1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; public: RtSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) @@ -1222,12 +1222,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override @@ -1242,16 +1242,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t fg = shade_pal_index(*source, light, shade_constants); @@ -1279,12 +1279,12 @@ class RtSubClamp4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; public: RtSubClamp4colsRGBACommand(int sx, int yl, int yh) @@ -1293,12 +1293,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override @@ -1313,16 +1313,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { for (int i = 0; i < 4; i++) @@ -1355,12 +1355,12 @@ class RtRevSubClamp1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; public: RtRevSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) @@ -1370,12 +1370,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override @@ -1390,16 +1390,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t fg = shade_pal_index(*source, light, shade_constants); @@ -1427,12 +1427,12 @@ class RtRevSubClamp4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; public: RtRevSubClamp4colsRGBACommand(int sx, int yl, int yh) @@ -1441,12 +1441,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override @@ -1461,16 +1461,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { for (int i = 0; i < 4; i++) @@ -1515,29 +1515,29 @@ public: class DrawColumnHorizRGBACommand : public DrawerCommand { - int dc_count; - fixed_t dc_iscale; - fixed_t dc_texturefrac; - const BYTE *dc_source; - int dc_x; - int dc_yl; - int dc_yh; + int _count; + fixed_t _iscale; + fixed_t _texturefrac; + const BYTE *_source; + int _x; + int _yl; + int _yh; public: DrawColumnHorizRGBACommand() { - dc_count = ::dc_count; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_x = ::dc_x; - dc_yl = ::dc_yl; - dc_yh = ::dc_yh; + _count = dc_count; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _x = dc_x; + _yl = dc_yl; + _yh = dc_yh; } void Execute(DrawerThread *thread) override { - int count = dc_count; + int count = _count; uint32_t *dest; fixed_t fracstep; fixed_t frac; @@ -1546,13 +1546,13 @@ public: return; { - int x = dc_x & 3; - dest = &thread->dc_temp_rgba[x + 4 * dc_yl]; + int x = _x & 3; + dest = &thread->dc_temp_rgba[x + 4 * _yl]; } - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = _iscale; + frac = _texturefrac; - const BYTE *source = dc_source; + const BYTE *source = _source; if (count & 1) { *dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep; @@ -1589,34 +1589,34 @@ public: class FillColumnHorizRGBACommand : public DrawerCommand { - int dc_x; - int dc_yl; - int dc_yh; - int dc_count; - int dc_color; + int _x; + int _yl; + int _yh; + int _count; + int _color; public: FillColumnHorizRGBACommand() { - dc_x = ::dc_x; - dc_count = ::dc_count; - dc_color = ::dc_color; - dc_yl = ::dc_yl; - dc_yh = ::dc_yh; + _x = dc_x; + _count = dc_count; + _color = dc_color; + _yl = dc_yl; + _yh = dc_yh; } void Execute(DrawerThread *thread) override { - int count = dc_count; - int color = dc_color; + int count = _count; + int color = _color; uint32_t *dest; if (count <= 0) return; { - int x = dc_x & 3; - dest = &thread->dc_temp_rgba[x + 4 * dc_yl]; + int x = _x & 3; + dest = &thread->dc_temp_rgba[x + 4 * _yl]; } if (count & 1) { From fee8650357e87ee6fc19a622133ec95d9ca9f364 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 17 Jun 2016 08:02:41 +0200 Subject: [PATCH 048/100] Add r_multithreaded and fix color issue --- src/r_draw_rgba.cpp | 2159 +++++++++++++++++++++++------------------- src/r_draw_rgba.h | 4 +- src/r_drawt_rgba.cpp | 540 +++++------ 3 files changed, 1439 insertions(+), 1264 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 2062609b4..722fbb8cd 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -56,6 +56,8 @@ extern float rw_light; extern float rw_lightstep; extern int wallshade; +CVAR(Bool, r_multithreaded, true, 0) + ///////////////////////////////////////////////////////////////////////////// DrawerCommandQueue *DrawerCommandQueue::Instance() @@ -227,28 +229,28 @@ void DrawerCommandQueue::StopThreads() class DrawColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_texturefrac; - DWORD dc_iscale; - fixed_t dc_light; - const BYTE *dc_source; - int dc_pitch; - ShadeConstants dc_shade_constants; - BYTE *dc_colormap; + int _count; + BYTE *_dest; + DWORD _texturefrac; + DWORD _iscale; + fixed_t _light; + const BYTE *_source; + int _pitch; + ShadeConstants _shade_constants; + BYTE *_colormap; public: DrawColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_texturefrac = ::dc_texturefrac; - dc_iscale = ::dc_iscale; - dc_light = ::dc_light; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; - dc_colormap = ::dc_colormap; + _count = dc_count; + _dest = dc_dest; + _texturefrac = dc_texturefrac; + _iscale = dc_iscale; + _light = dc_light; + _source = dc_source; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _colormap = dc_colormap; } void Execute(DrawerThread *thread) override @@ -258,28 +260,28 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); // Zero length, column does not exceed a pixel. if (count <= 0) return; // Framebuffer destination address. - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; // Determine scaling, // which is the only mapping to be done. - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); // [RH] Get local copies of these variables so that the compiler // has a better chance of optimizing this well. - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - BYTE *colormap = dc_colormap; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + BYTE *colormap = _colormap; do { @@ -294,20 +296,20 @@ public: class FillColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - fixed_t dc_light; - int dc_pitch; - int dc_color; + int _count; + BYTE *_dest; + fixed_t _light; + int _pitch; + int _color; public: FillColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_light = ::dc_light; - dc_pitch = ::dc_pitch; - dc_color = ::dc_color; + _count = dc_count; + _dest = dc_dest; + _light = dc_light; + _pitch = dc_pitch; + _color = dc_color; } void Execute(DrawerThread *thread) override @@ -315,18 +317,18 @@ public: int count; uint32_t* dest; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - uint32_t light = calc_light_multiplier(dc_light); + uint32_t light = calc_light_multiplier(_light); { - int pitch = dc_pitch * thread->num_cores; - uint32_t color = shade_pal_index_simple(dc_color, light); + int pitch = _pitch * thread->num_cores; + uint32_t color = shade_pal_index_simple(_color, light); do { @@ -339,20 +341,20 @@ public: class FillAddColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - int dc_pitch; - fixed_t dc_light; - int dc_color; + int _count; + BYTE *_dest; + int _pitch; + fixed_t _light; + int _color; public: FillAddColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_color = ::dc_color; + _count = dc_count; + _dest = dc_dest; + _pitch = dc_pitch; + _light = dc_light; + _color = dc_color; } void Execute(DrawerThread *thread) override @@ -360,14 +362,14 @@ public: int count; uint32_t *dest; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -390,20 +392,20 @@ public: class FillAddClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - int dc_pitch; - fixed_t dc_light; - int dc_color; + int _count; + BYTE *_dest; + int _pitch; + fixed_t _light; + int _color; public: FillAddClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_color = ::dc_color; + _count = dc_count; + _dest = dc_dest; + _pitch = dc_pitch; + _light = dc_light; + _color = dc_color; } void Execute(DrawerThread *thread) override @@ -411,14 +413,14 @@ public: int count; uint32_t *dest; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -441,20 +443,20 @@ public: class FillSubClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - int dc_pitch; - int dc_color; - fixed_t dc_light; + int _count; + BYTE *_dest; + int _pitch; + int _color; + fixed_t _light; public: FillSubClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_pitch = ::dc_pitch; - dc_color = ::dc_color; - dc_light = ::dc_light; + _count = dc_count; + _dest = dc_dest; + _pitch = dc_pitch; + _color = dc_color; + _light = dc_light; } void Execute(DrawerThread *thread) override @@ -462,14 +464,14 @@ public: int count; uint32_t *dest; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -492,20 +494,20 @@ public: class FillRevSubClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - int dc_pitch; - int dc_color; - fixed_t dc_light; + int _count; + BYTE *_dest; + int _pitch; + int _color; + fixed_t _light; public: FillRevSubClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_pitch = ::dc_pitch; - dc_color = ::dc_color; - dc_light = ::dc_light; + _count = dc_count; + _dest = dc_dest; + _pitch = dc_pitch; + _color = dc_color; + _light = dc_light; } void Execute(DrawerThread *thread) override @@ -513,14 +515,14 @@ public: int count; uint32_t *dest; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 24) & 0xff; uint32_t fg_green = (fg >> 16) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -543,22 +545,22 @@ public: class DrawFuzzColumnRGBACommand : public DrawerCommand { - int dc_x; - int dc_yl; - int dc_yh; - BYTE *dc_destorg; - int dc_pitch; + int _x; + int _yl; + int _yh; + BYTE *_destorg; + int _pitch; int fuzzpos; int fuzzviewheight; public: DrawFuzzColumnRGBACommand() { - dc_x = ::dc_x; - dc_yl = ::dc_yl; - dc_yh = ::dc_yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; + _x = dc_x; + _yl = dc_yl; + _yh = dc_yh; + _destorg = dc_destorg; + _pitch = dc_pitch; fuzzpos = ::fuzzpos; fuzzviewheight = ::fuzzviewheight; } @@ -569,24 +571,24 @@ public: uint32_t *dest; // Adjust borders. Low... - if (dc_yl == 0) - dc_yl = 1; + if (_yl == 0) + _yl = 1; // .. and high. - if (dc_yh > fuzzviewheight) - dc_yh = fuzzviewheight; + if (_yh > fuzzviewheight) + _yh = fuzzviewheight; - count = thread->count_for_thread(dc_yl, dc_yh - dc_yl + 1); + count = thread->count_for_thread(_yl, _yh - _yl + 1); // Zero length. if (count <= 0) return; - dest = thread->dest_for_thread(dc_yl, dc_pitch, ylookup[dc_yl] + dc_x + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(_yl, _pitch, ylookup[_yl] + _x + (uint32_t*)_destorg); - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; int fuzzstep = thread->num_cores; - int fuzz = (fuzzpos + thread->skipped_by_thread(dc_yl)) % FUZZTABLE; + int fuzz = (fuzzpos + thread->skipped_by_thread(_yl)) % FUZZTABLE; while (count > 0) { @@ -620,32 +622,32 @@ public: class DrawAddColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - BYTE *dc_colormap; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + BYTE *_colormap; public: DrawAddColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_colormap = ::dc_colormap; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _colormap = dc_colormap; } void Execute(DrawerThread *thread) override @@ -655,25 +657,25 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - BYTE *colormap = dc_colormap; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; + BYTE *colormap = _colormap; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -701,28 +703,28 @@ public: class DrawTranslatedColumnRGBACommand : public DrawerCommand { - int dc_count; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - BYTE *dc_translation; - const BYTE *dc_source; - int dc_pitch; + int _count; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + BYTE *_translation; + const BYTE *_source; + int _pitch; public: DrawTranslatedColumnRGBACommand() { - dc_count = ::dc_count; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_translation = ::dc_translation; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; + _count = dc_count; + _light = dc_light; + _shade_constants = dc_shade_constants; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _translation = dc_translation; + _source = dc_source; + _pitch = dc_pitch; } void Execute(DrawerThread *thread) override @@ -732,23 +734,23 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { // [RH] Local copies of global vars to improve compiler optimizations - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; + BYTE *translation = _translation; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; do { @@ -762,28 +764,32 @@ public: class DrawTlatedAddColumnRGBACommand : public DrawerCommand { - int dc_count; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - BYTE *dc_translation; - const BYTE *dc_source; - int dc_pitch; + int _count; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + BYTE *_translation; + const BYTE *_source; + int _pitch; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawTlatedAddColumnRGBACommand() { - dc_count = ::dc_count; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_translation = ::dc_translation; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; + _count = dc_count; + _light = dc_light; + _shade_constants = dc_shade_constants; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _translation = dc_translation; + _source = dc_source; + _pitch = dc_pitch; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -793,25 +799,25 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; + BYTE *translation = _translation; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -840,28 +846,28 @@ public: class DrawShadedColumnRGBACommand : public DrawerCommand { private: - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - fixed_t dc_light; - const BYTE *dc_source; - lighttable_t *dc_colormap; - int dc_color; - int dc_pitch; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + fixed_t _light; + const BYTE *_source; + lighttable_t *_colormap; + int _color; + int _pitch; public: DrawShadedColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_light = ::dc_light; - dc_source = ::dc_source; - dc_colormap = ::dc_colormap; - dc_color = ::dc_color; - dc_pitch = ::dc_pitch; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _light = dc_light; + _source = dc_source; + _colormap = dc_colormap; + _color = dc_color; + _pitch = dc_pitch; } void Execute(DrawerThread *thread) override @@ -870,25 +876,25 @@ public: uint32_t *dest; fixed_t frac, fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; { - const BYTE *source = dc_source; - BYTE *colormap = dc_colormap; - int pitch = dc_pitch * thread->num_cores; + const BYTE *source = _source; + BYTE *colormap = _colormap; + int pitch = _pitch * thread->num_cores; do { @@ -913,30 +919,30 @@ public: class DrawAddClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawAddClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -946,23 +952,23 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -989,32 +995,32 @@ public: class DrawAddClampTranslatedColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - BYTE *dc_translation; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + BYTE *_translation; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawAddClampTranslatedColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_translation = ::dc_translation; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _translation = dc_translation; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -1024,24 +1030,24 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + BYTE *translation = _translation; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -1068,30 +1074,30 @@ public: class DrawSubClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawSubClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -1101,23 +1107,23 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -1144,32 +1150,32 @@ public: class DrawSubClampTranslatedColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - BYTE *dc_translation; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + BYTE *_translation; public: DrawSubClampTranslatedColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_translation = ::dc_translation; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _translation = dc_translation; } void Execute(DrawerThread *thread) override @@ -1179,24 +1185,24 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + BYTE *translation = _translation; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -1223,30 +1229,30 @@ public: class DrawRevSubClampColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawRevSubClampColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -1256,22 +1262,22 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -1298,32 +1304,32 @@ public: class DrawRevSubClampTranslatedColumnRGBACommand : public DrawerCommand { - int dc_count; - BYTE *dc_dest; - DWORD dc_iscale; - DWORD dc_texturefrac; - const BYTE *dc_source; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - BYTE *dc_translation; + int _count; + BYTE *_dest; + DWORD _iscale; + DWORD _texturefrac; + const BYTE *_source; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + BYTE *_translation; public: DrawRevSubClampTranslatedColumnRGBACommand() { - dc_count = ::dc_count; - dc_dest = ::dc_dest; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_translation = ::dc_translation; + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _translation = dc_translation; } void Execute(DrawerThread *thread) override @@ -1333,24 +1339,24 @@ public: fixed_t frac; fixed_t fracstep; - count = thread->count_for_thread(dc_dest_y, dc_count); + count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - fracstep = dc_iscale * thread->num_cores; - frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); + fracstep = _iscale * thread->num_cores; + frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - BYTE *translation = dc_translation; - const BYTE *source = dc_source; - int pitch = dc_pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + BYTE *translation = _translation; + const BYTE *source = _source; + int pitch = _pitch * thread->num_cores; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -1377,42 +1383,42 @@ public: class DrawSpanRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_x1; - int ds_x2; - int ds_y; - int ds_xbits; - int ds_ybits; - BYTE *dc_destorg; - fixed_t ds_light; - ShadeConstants ds_shade_constants; + const uint32_t *_source; + fixed_t _xfrac; + fixed_t _yfrac; + fixed_t _xstep; + fixed_t _ystep; + int _x1; + int _x2; + int _y; + int _xbits; + int _ybits; + BYTE *_destorg; + fixed_t _light; + ShadeConstants _shade_constants; public: DrawSpanRGBACommand() { - ds_source = (const uint32_t*)::ds_source; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; - dc_destorg = ::dc_destorg; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; + _source = (const uint32_t*)ds_source; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _xstep = ds_xstep; + _ystep = ds_ystep; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xbits = ds_xbits; + _ybits = ds_ybits; + _destorg = dc_destorg; + _light = ds_light; + _shade_constants = ds_shade_constants; } #ifdef NO_SSE void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1420,24 +1426,24 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. @@ -1456,9 +1462,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { @@ -1477,7 +1483,7 @@ public: #else void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1485,24 +1491,24 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. @@ -1602,9 +1608,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; int sse_count = count / 4; count -= sse_count * 4; @@ -1698,42 +1704,42 @@ public: class DrawSpanMaskedRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_light; - ShadeConstants ds_shade_constants; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - BYTE *dc_destorg; - int ds_x1; - int ds_x2; - int ds_y1; - int ds_y; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_xbits; - int ds_ybits; + const uint32_t *_source; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _xfrac; + fixed_t _yfrac; + BYTE *_destorg; + int _x1; + int _x2; + int _y1; + int _y; + fixed_t _xstep; + fixed_t _ystep; + int _xbits; + int _ybits; public: DrawSpanMaskedRGBACommand() { - ds_source = (const uint32_t*)::ds_source; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - dc_destorg = ::dc_destorg; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; + _source = (const uint32_t*)ds_source; + _light = ds_light; + _shade_constants = ds_shade_constants; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _destorg = dc_destorg; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xstep = ds_xstep; + _ystep = ds_ystep; + _xbits = ds_xbits; + _ybits = ds_ybits; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1741,24 +1747,24 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. do @@ -1778,9 +1784,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { uint32_t texdata; @@ -1801,42 +1807,46 @@ public: class DrawSpanTranslucentRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_light; - ShadeConstants ds_shade_constants; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - BYTE *dc_destorg; - int ds_x1; - int ds_x2; - int ds_y1; - int ds_y; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_xbits; - int ds_ybits; + const uint32_t *_source; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _xfrac; + fixed_t _yfrac; + BYTE *_destorg; + int _x1; + int _x2; + int _y1; + int _y; + fixed_t _xstep; + fixed_t _ystep; + int _xbits; + int _ybits; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawSpanTranslucentRGBACommand() { - ds_source = (const uint32_t *)::ds_source; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - dc_destorg = ::dc_destorg; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; + _source = (const uint32_t *)ds_source; + _light = ds_light; + _shade_constants = ds_shade_constants; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _destorg = dc_destorg; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xstep = ds_xstep; + _ystep = ds_ystep; + _xbits = ds_xbits; + _ybits = ds_ybits; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1844,27 +1854,27 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. do @@ -1892,9 +1902,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); @@ -1923,42 +1933,46 @@ public: class DrawSpanMaskedTranslucentRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_light; - ShadeConstants ds_shade_constants; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - BYTE *dc_destorg; - int ds_x1; - int ds_x2; - int ds_y1; - int ds_y; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_xbits; - int ds_ybits; + const uint32_t *_source; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _xfrac; + fixed_t _yfrac; + BYTE *_destorg; + int _x1; + int _x2; + int _y1; + int _y; + fixed_t _xstep; + fixed_t _ystep; + int _xbits; + int _ybits; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawSpanMaskedTranslucentRGBACommand() { - ds_source = (const uint32_t*)::ds_source; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - dc_destorg = ::dc_destorg; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; + _source = (const uint32_t*)ds_source; + _light = ds_light; + _shade_constants = ds_shade_constants; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _destorg = dc_destorg; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xstep = ds_xstep; + _ystep = ds_ystep; + _xbits = ds_xbits; + _ybits = ds_ybits; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -1966,27 +1980,27 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. do @@ -2019,9 +2033,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { uint32_t texdata; @@ -2055,42 +2069,46 @@ public: class DrawSpanAddClampRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_light; - ShadeConstants ds_shade_constants; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - BYTE *dc_destorg; - int ds_x1; - int ds_x2; - int ds_y1; - int ds_y; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_xbits; - int ds_ybits; + const uint32_t *_source; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _xfrac; + fixed_t _yfrac; + BYTE *_destorg; + int _x1; + int _x2; + int _y1; + int _y; + fixed_t _xstep; + fixed_t _ystep; + int _xbits; + int _ybits; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawSpanAddClampRGBACommand() { - ds_source = (const uint32_t*)::ds_source; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - dc_destorg = ::dc_destorg; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; + _source = (const uint32_t*)ds_source; + _light = ds_light; + _shade_constants = ds_shade_constants; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _destorg = dc_destorg; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xstep = ds_xstep; + _ystep = ds_ystep; + _xbits = ds_xbits; + _ybits = ds_ybits; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -2098,27 +2116,27 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. do @@ -2146,9 +2164,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); @@ -2177,42 +2195,46 @@ public: class DrawSpanMaskedAddClampRGBACommand : public DrawerCommand { - const uint32_t *ds_source; - fixed_t ds_light; - ShadeConstants ds_shade_constants; - fixed_t ds_xfrac; - fixed_t ds_yfrac; - BYTE *dc_destorg; - int ds_x1; - int ds_x2; - int ds_y1; - int ds_y; - fixed_t ds_xstep; - fixed_t ds_ystep; - int ds_xbits; - int ds_ybits; + const uint32_t *_source; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _xfrac; + fixed_t _yfrac; + BYTE *_destorg; + int _x1; + int _x2; + int _y1; + int _y; + fixed_t _xstep; + fixed_t _ystep; + int _xbits; + int _ybits; + fixed_t _srcalpha; + fixed_t _destalpha; public: DrawSpanMaskedAddClampRGBACommand() { - ds_source = (const uint32_t*)::ds_source; - ds_light = ::ds_light; - ds_shade_constants = ::ds_shade_constants; - ds_xfrac = ::ds_xfrac; - ds_yfrac = ::ds_yfrac; - dc_destorg = ::dc_destorg; - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - ds_xstep = ::ds_xstep; - ds_ystep = ::ds_ystep; - ds_xbits = ::ds_xbits; - ds_ybits = ::ds_ybits; + _source = (const uint32_t*)ds_source; + _light = ds_light; + _shade_constants = ds_shade_constants; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _destorg = dc_destorg; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xstep = ds_xstep; + _ystep = ds_ystep; + _xbits = ds_xbits; + _ybits = ds_ybits; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; dsfixed_t xfrac; @@ -2220,27 +2242,27 @@ public: dsfixed_t xstep; dsfixed_t ystep; uint32_t* dest; - const uint32_t* source = ds_source; + const uint32_t* source = _source; int count; int spot; - uint32_t light = calc_light_multiplier(ds_light); - ShadeConstants shade_constants = ds_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - xfrac = ds_xfrac; - yfrac = ds_yfrac; + xfrac = _xfrac; + yfrac = _yfrac; - dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - count = ds_x2 - ds_x1 + 1; + count = _x2 - _x1 + 1; - xstep = ds_xstep; - ystep = ds_ystep; + xstep = _xstep; + ystep = _ystep; - if (ds_xbits == 6 && ds_ybits == 6) + if (_xbits == 6 && _ybits == 6) { // 64x64 is the most common case by far, so special case it. do @@ -2273,9 +2295,9 @@ public: } else { - BYTE yshift = 32 - ds_ybits; - BYTE xshift = yshift - ds_xbits; - int xmask = ((1 << ds_xbits) - 1) << ds_ybits; + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; do { uint32_t texdata; @@ -2309,33 +2331,33 @@ public: class FillSpanRGBACommand : public DrawerCommand { - int ds_x1; - int ds_x2; - int ds_y; - BYTE *dc_destorg; - fixed_t ds_light; - int ds_color; + int _x1; + int _x2; + int _y; + BYTE *_destorg; + fixed_t _light; + int _color; public: FillSpanRGBACommand() { - ds_x1 = ::ds_x1; - ds_x2 = ::ds_x2; - ds_y = ::ds_y; - dc_destorg = ::dc_destorg; - ds_light = ::ds_light; - ds_color = ::ds_color; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _destorg = dc_destorg; + _light = ds_light; + _color = ds_color; } void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(ds_y)) + if (thread->line_skipped_by_thread(_y)) return; - uint32_t *dest = ylookup[ds_y] + ds_x1 + (uint32_t*)dc_destorg; - int count = (ds_x2 - ds_x1 + 1); - uint32_t light = calc_light_multiplier(ds_light); - uint32_t color = shade_pal_index_simple(ds_color, light); + uint32_t *dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; + int count = (_x2 - _x1 + 1); + uint32_t light = calc_light_multiplier(_light); + uint32_t color = shade_pal_index_simple(_color, light); for (int i = 0; i < count; i++) dest[i] = color; } @@ -2343,45 +2365,45 @@ public: class Vlinec1RGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int vlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; public: Vlinec1RGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; vlinebits = ::vlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = vlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; do { @@ -2394,10 +2416,10 @@ public: class Vlinec4RGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; int vlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -2407,10 +2429,10 @@ class Vlinec4RGBACommand : public DrawerCommand public: Vlinec4RGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; vlinebits = ::vlinebits; for (int i = 0; i < 4; i++) { @@ -2424,12 +2446,12 @@ public: #ifdef NO_SSE void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = vlinebits; DWORD place; @@ -2438,11 +2460,11 @@ public: uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -2461,24 +2483,24 @@ public: #else void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = vlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; uint32_t light0 = calc_light_multiplier(palookuplight[0]); uint32_t light1 = calc_light_multiplier(palookuplight[1]); uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -2543,45 +2565,45 @@ public: class Mvlinec1RGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int mvlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; public: Mvlinec1RGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; mvlinebits = ::mvlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = mvlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; do { @@ -2598,10 +2620,10 @@ public: class Mvlinec4RGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; int mvlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -2611,10 +2633,10 @@ class Mvlinec4RGBACommand : public DrawerCommand public: Mvlinec4RGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; mvlinebits = ::mvlinebits; for (int i = 0; i < 4; i++) { @@ -2628,12 +2650,12 @@ public: #ifdef NO_SSE void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = mvlinebits; DWORD place; @@ -2642,11 +2664,11 @@ public: uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -2666,12 +2688,12 @@ public: #else void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = mvlinebits; uint32_t light0 = calc_light_multiplier(palookuplight[0]); @@ -2679,11 +2701,11 @@ public: uint32_t light2 = calc_light_multiplier(palookuplight[2]); uint32_t light3 = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -2754,52 +2776,52 @@ public: class Tmvline1AddRGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int tmvlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: Tmvline1AddRGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; tmvlinebits = ::tmvlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = tmvlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -2829,12 +2851,12 @@ public: class Tmvline4AddRGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; int tmvlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -2844,12 +2866,12 @@ class Tmvline4AddRGBACommand : public DrawerCommand public: Tmvline4AddRGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; tmvlinebits = ::tmvlinebits; for (int i = 0; i < 4; i++) { @@ -2862,12 +2884,12 @@ public: void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -2876,14 +2898,14 @@ public: light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -2921,52 +2943,52 @@ public: class Tmvline1AddClampRGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int tmvlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: Tmvline1AddClampRGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; tmvlinebits = ::tmvlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = tmvlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -2996,12 +3018,12 @@ public: class Tmvline4AddClampRGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; int tmvlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -3011,12 +3033,12 @@ class Tmvline4AddClampRGBACommand : public DrawerCommand public: Tmvline4AddClampRGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; tmvlinebits = ::tmvlinebits; for (int i = 0; i < 4; i++) { @@ -3029,12 +3051,12 @@ public: void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -3043,14 +3065,14 @@ public: light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -3088,52 +3110,52 @@ public: class Tmvline1SubClampRGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int tmvlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: Tmvline1SubClampRGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; tmvlinebits = ::tmvlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = tmvlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -3163,12 +3185,12 @@ public: class Tmvline4SubClampRGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; int tmvlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -3178,12 +3200,12 @@ class Tmvline4SubClampRGBACommand : public DrawerCommand public: Tmvline4SubClampRGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; tmvlinebits = ::tmvlinebits; for (int i = 0; i < 4; i++) { @@ -3196,12 +3218,12 @@ public: void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -3210,14 +3232,14 @@ public: light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -3255,52 +3277,52 @@ public: class Tmvline1RevSubClampRGBACommand : public DrawerCommand { - DWORD dc_iscale; - DWORD dc_texturefrac; - int dc_count; - const BYTE *dc_source; - BYTE *dc_dest; + DWORD _iscale; + DWORD _texturefrac; + int _count; + const BYTE *_source; + BYTE *_dest; int tmvlinebits; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: Tmvline1RevSubClampRGBACommand() { - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_count = ::dc_count; - dc_source = ::dc_source; - dc_dest = ::dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _count = dc_count; + _source = dc_source; + _dest = dc_dest; tmvlinebits = ::tmvlinebits; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - DWORD fracstep = dc_iscale * thread->num_cores; - DWORD frac = dc_texturefrac + dc_iscale * thread->skipped_by_thread(dc_dest_y); - const uint32 *source = (const uint32 *)dc_source; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); + DWORD fracstep = _iscale * thread->num_cores; + DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); + const uint32 *source = (const uint32 *)_source; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = tmvlinebits; - int pitch = dc_pitch * thread->num_cores; + int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { @@ -3330,12 +3352,12 @@ public: class Tmvline4RevSubClampRGBACommand : public DrawerCommand { - BYTE *dc_dest; - int dc_count; - int dc_pitch; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; int tmvlinebits; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -3345,12 +3367,12 @@ class Tmvline4RevSubClampRGBACommand : public DrawerCommand public: Tmvline4RevSubClampRGBACommand() { - dc_dest = ::dc_dest; - dc_count = ::dc_count; - dc_pitch = ::dc_pitch; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; tmvlinebits = ::tmvlinebits; for (int i = 0; i < 4; i++) { @@ -3363,12 +3385,12 @@ public: void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(dc_dest_y, dc_count); + int count = thread->count_for_thread(_dest_y, _count); if (count <= 0) return; - uint32_t *dest = thread->dest_for_thread(dc_dest_y, dc_pitch, (uint32_t*)dc_dest); - int pitch = dc_pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; int bits = tmvlinebits; uint32_t light[4]; @@ -3377,14 +3399,14 @@ public: light[2] = calc_light_multiplier(palookuplight[2]); light[3] = calc_light_multiplier(palookuplight[3]); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(dc_dest_y); + int skipped = thread->skipped_by_thread(_dest_y); for (int i = 0; i < 4; i++) { local_vplce[i] += local_vince[i] * skipped; @@ -3425,9 +3447,9 @@ class DrawFogBoundaryLineRGBACommand : public DrawerCommand int _y; int _x; int _x2; - BYTE *dc_destorg; - fixed_t dc_light; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + fixed_t _light; + ShadeConstants _shade_constants; public: DrawFogBoundaryLineRGBACommand(int y, int x, int x2) @@ -3436,9 +3458,9 @@ public: _x = x; _x2 = x2; - dc_destorg = ::dc_destorg; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _light = dc_light; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override @@ -3450,10 +3472,10 @@ public: int x = _x; int x2 = _x2; - uint32_t *dest = ylookup[y] + (uint32_t*)dc_destorg; + uint32_t *dest = ylookup[y] + (uint32_t*)_destorg; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants constants = _shade_constants; do { @@ -3492,6 +3514,157 @@ public: } }; +class DrawTiltedSpanRGBACommand : public DrawerCommand +{ + int _y; + int _x1; + int _x2; + BYTE *_destorg; + fixed_t _light; + ShadeConstants _shade_constants; + const BYTE *_source; + +public: + DrawTiltedSpanRGBACommand(int y, int x1, int x2) + { + _y = y; + _x1 = x1; + _x2 = x2; + + _destorg = dc_destorg; + _source = ds_source; + } + + void Execute(DrawerThread *thread) override + { + if (thread->line_skipped_by_thread(_y)) + return; + + int y = _y; + int x1 = _x1; + int x2 = _x2; + + // Slopes are broken currently in master. + // Until R_DrawTiltedPlane is fixed we are just going to fill with a solid color. + + uint32_t *source = (uint32_t*)_source; + uint32_t *dest = ylookup[y] + x1 + (uint32_t*)_destorg; + + int count = x2 - x1 + 1; + while (count > 0) + { + *(dest++) = source[0]; + count--; + } + } +}; + +class DrawColoredSpanRGBACommand : public DrawerCommand +{ + int _y; + int _x1; + int _x2; + BYTE *_destorg; + fixed_t _light; + int _color; + +public: + DrawColoredSpanRGBACommand(int y, int x1, int x2) + { + _y = y; + _x1 = x1; + _x2 = x2; + + _destorg = dc_destorg; + _light = ds_light; + _color = ds_color; + } + + void Execute(DrawerThread *thread) override + { + if (thread->line_skipped_by_thread(_y)) + return; + + int y = _y; + int x1 = _x1; + int x2 = _x2; + + uint32_t *dest = ylookup[y] + x1 + (uint32_t*)_destorg; + int count = (x2 - x1 + 1); + uint32_t light = calc_light_multiplier(_light); + uint32_t color = shade_pal_index_simple(_color, light); + for (int i = 0; i < count; i++) + dest[i] = color; + } +}; + +class FillTransColumnRGBACommand : public DrawerCommand +{ + int _x; + int _y1; + int _y2; + int _color; + int _a; + BYTE *_destorg; + int _pitch; + fixed_t _light; + +public: + FillTransColumnRGBACommand(int x, int y1, int y2, int color, int a) + { + _x = x; + _y1 = y1; + _y2 = y2; + _color = color; + _a = a; + + _destorg = dc_destorg; + _pitch = dc_pitch; + } + + void Execute(DrawerThread *thread) override + { + int x = _x; + int y1 = _y1; + int y2 = _y2; + int color = _color; + int a = _a; + + int ycount = thread->count_for_thread(y1, y2 - y1 + 1); + if (ycount <= 0) + return; + + uint32_t fg = GPalette.BaseColors[color].d; + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t alpha = a + 1; + uint32_t inv_alpha = 256 - alpha; + + fg_red *= alpha; + fg_green *= alpha; + fg_blue *= alpha; + + int spacing = _pitch * thread->num_cores; + uint32_t *dest = thread->dest_for_thread(y1, _pitch, ylookup[y1] + x + (uint32_t*)_destorg); + + for (int y = 0; y < ycount; y++) + { + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red + bg_red * inv_alpha) / 256; + uint32_t green = (fg_green + bg_green * inv_alpha) / 256; + uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += spacing; + } + } +}; + ApplySpecialColormapRGBACommand::ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen) { buffer = screen->GetBuffer(); diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 47ea75260..0ab3e298a 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -184,6 +184,8 @@ public: virtual void Execute(DrawerThread *thread) = 0; }; +EXTERN_CVAR(Bool, r_multithreaded) + // Manages queueing up commands and executing them on worker threads class DrawerCommandQueue { @@ -227,7 +229,7 @@ public: static void QueueCommand(Types &&... args) { auto queue = Instance(); - if (queue->threaded_render == 0) + if (queue->threaded_render == 0 || !r_multithreaded) { T command(std::forward(args)...); command.Execute(&queue->single_core_thread); diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index d2d715c8d..2311cb447 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -59,8 +59,8 @@ class RtCopy1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; + BYTE *_destorg; + int _pitch; public: RtCopy1colRGBACommand(int hx, int sx, int yl, int yh) @@ -70,8 +70,8 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; + _destorg = dc_destorg; + _pitch = dc_pitch; } void Execute(DrawerThread *thread) override @@ -85,9 +85,9 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = thread->num_cores * 4; if (count & 1) { @@ -121,11 +121,11 @@ class RtMap1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - BYTE *dc_destorg; - int dc_pitch; - BYTE *dc_colormap; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE *_destorg; + int _pitch; + BYTE *_colormap; public: RtMap1colRGBACommand(int hx, int sx, int yl, int yh) @@ -135,11 +135,11 @@ public: this->yl = yl; this->yh = yh; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_colormap = ::dc_colormap; + _light = dc_light; + _shade_constants = dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _colormap = dc_colormap; } void Execute(DrawerThread *thread) override @@ -154,15 +154,15 @@ public: if (count <= 0) return; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = thread->num_cores * 4; - BYTE *colormap = dc_colormap; + BYTE *colormap = _colormap; if (count & 1) { *dest = shade_pal_index(colormap[*source], light, shade_constants); @@ -186,11 +186,11 @@ class RtMap4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - BYTE *dc_destorg; - int dc_pitch; - BYTE *colormap; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE *_destorg; + int _pitch; + BYTE *_colormap; public: RtMap4colsRGBACommand(int sx, int yl, int yh) @@ -199,11 +199,11 @@ public: this->yl = yl; this->yh = yh; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_colormap = ::dc_colormap; + _light = dc_light; + _shade_constants = dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _colormap = dc_colormap; } #ifdef NO_SSE @@ -219,15 +219,15 @@ public: if (count <= 0) return; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = thread->num_cores * 4; - BYTE *colormap = dc_colormap; + BYTE *colormap = _colormap; if (count & 1) { dest[0] = shade_pal_index(colormap[source[0]], light, shade_constants); @@ -266,16 +266,16 @@ public: if (count <= 0) return; - ShadeConstants shade_constants = dc_shade_constants; - uint32_t light = calc_light_multiplier(dc_light); + ShadeConstants shade_constants = _shade_constants; + uint32_t light = calc_light_multiplier(_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = thread->num_cores * 4; - BYTE *colormap = dc_colormap; + BYTE *colormap = _colormap; if (shade_constants.simple_shade) { @@ -507,13 +507,13 @@ class RtAdd1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - BYTE *dc_colormap; + BYTE *_destorg; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + BYTE *_colormap; public: RtAdd1colRGBACommand(int hx, int sx, int yl, int yh) @@ -523,13 +523,13 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_colormap = ::dc_colormap; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _colormap = dc_colormap; } void Execute(DrawerThread *thread) override @@ -544,17 +544,17 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - BYTE *colormap = dc_colormap; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; + BYTE *colormap = _colormap; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t fg = shade_pal_index(colormap[*source], light, shade_constants); @@ -583,13 +583,13 @@ class RtAdd4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - BYTE *dc_colormap; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_destorg; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE *_colormap; + fixed_t _srcalpha; + fixed_t _destalpha; public: RtAdd4colsRGBACommand(int sx, int yl, int yh) @@ -598,13 +598,13 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_colormap = ::dc_colormap; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _colormap = dc_colormap; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } #ifdef NO_SSE @@ -620,17 +620,17 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; - BYTE *colormap = dc_colormap; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; + BYTE *colormap = _colormap; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { for (int i = 0; i < 4; i++) @@ -668,19 +668,19 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); + uint32_t light = calc_light_multiplier(_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; - BYTE *colormap = dc_colormap; + BYTE *colormap = _colormap; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; if (shade_constants.simple_shade) { @@ -764,11 +764,11 @@ class RtShaded1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - lighttable_t *dc_colormap; - BYTE *dc_destorg; - int dc_pitch; - int dc_color; - fixed_t dc_light; + lighttable_t *_colormap; + BYTE *_destorg; + int _pitch; + int _color; + fixed_t _light; public: RtShaded1colRGBACommand(int hx, int sx, int yl, int yh) @@ -778,11 +778,11 @@ public: this->yl = yl; this->yh = yh; - dc_colormap = ::dc_colormap; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_color = ::dc_color; - dc_light = ::dc_light; + _colormap = dc_colormap; + _destorg = dc_destorg; + _pitch = dc_pitch; + _color = dc_color; + _light = dc_light; } void Execute(DrawerThread *thread) override @@ -798,13 +798,13 @@ public: if (count <= 0) return; - colormap = dc_colormap; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + colormap = _colormap; + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -833,11 +833,11 @@ class RtShaded4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - lighttable_t *dc_colormap; - int dc_color; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; + lighttable_t *_colormap; + int _color; + BYTE *_destorg; + int _pitch; + fixed_t _light; public: RtShaded4colsRGBACommand(int sx, int yl, int yh) @@ -846,11 +846,11 @@ public: this->yl = yl; this->yh = yh; - dc_colormap = ::dc_colormap; - dc_color = ::dc_color; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; + _colormap = dc_colormap; + _color = dc_color; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; } #ifdef NO_SSE @@ -867,13 +867,13 @@ public: if (count <= 0) return; - colormap = dc_colormap; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + colormap = _colormap; + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t fg = shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light)); + uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -912,13 +912,13 @@ public: if (count <= 0) return; - colormap = dc_colormap; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + colormap = _colormap; + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(dc_color, calc_light_multiplier(dc_light))), _mm_setzero_si128()); + __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(_color, calc_light_multiplier(_light))), _mm_setzero_si128()); __m128i alpha_one = _mm_set1_epi16(64); do { @@ -957,12 +957,12 @@ class RtAddClamp1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - ShadeConstants dc_shade_constants; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; + BYTE *_destorg; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; public: RtAddClamp1colRGBACommand(int hx, int sx, int yl, int yh) @@ -972,12 +972,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_shade_constants = ::dc_shade_constants; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -992,16 +992,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t fg = shade_pal_index(*source, light, shade_constants); @@ -1029,12 +1029,12 @@ class RtAddClamp4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; public: RtAddClamp4colsRGBACommand(int sx, int yl, int yh) @@ -1043,12 +1043,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; } #ifdef NO_SSE @@ -1064,16 +1064,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { for (int i = 0; i < 4; i++) @@ -1110,18 +1110,18 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); + uint32_t light = calc_light_multiplier(_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - ShadeConstants shade_constants = dc_shade_constants; + ShadeConstants shade_constants = _shade_constants; if (shade_constants.simple_shade) { @@ -1205,12 +1205,12 @@ class RtSubClamp1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; public: RtSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) @@ -1220,12 +1220,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override @@ -1240,16 +1240,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t fg = shade_pal_index(*source, light, shade_constants); @@ -1277,12 +1277,12 @@ class RtSubClamp4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; public: RtSubClamp4colsRGBACommand(int sx, int yl, int yh) @@ -1291,12 +1291,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override @@ -1311,16 +1311,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { for (int i = 0; i < 4; i++) @@ -1353,12 +1353,12 @@ class RtRevSubClamp1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; public: RtRevSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) @@ -1368,12 +1368,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override @@ -1388,16 +1388,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t fg = shade_pal_index(*source, light, shade_constants); @@ -1425,12 +1425,12 @@ class RtRevSubClamp4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *dc_destorg; - int dc_pitch; - fixed_t dc_light; - fixed_t dc_srcalpha; - fixed_t dc_destalpha; - ShadeConstants dc_shade_constants; + BYTE *_destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; public: RtRevSubClamp4colsRGBACommand(int sx, int yl, int yh) @@ -1439,12 +1439,12 @@ public: this->yl = yl; this->yh = yh; - dc_destorg = ::dc_destorg; - dc_pitch = ::dc_pitch; - dc_light = ::dc_light; - dc_srcalpha = ::dc_srcalpha; - dc_destalpha = ::dc_destalpha; - dc_shade_constants = ::dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override @@ -1459,16 +1459,16 @@ public: if (count <= 0) return; - dest = thread->dest_for_thread(yl, dc_pitch, ylookup[yl] + sx + (uint32_t*)dc_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = dc_pitch * thread->num_cores; + pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(dc_light); - ShadeConstants shade_constants = dc_shade_constants; + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = dc_srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = dc_destalpha >> (FRACBITS - 8); + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { for (int i = 0; i < 4; i++) @@ -1513,29 +1513,29 @@ public: class DrawColumnHorizRGBACommand : public DrawerCommand { - int dc_count; - fixed_t dc_iscale; - fixed_t dc_texturefrac; - const BYTE *dc_source; - int dc_x; - int dc_yl; - int dc_yh; + int _count; + fixed_t _iscale; + fixed_t _texturefrac; + const BYTE *_source; + int _x; + int _yl; + int _yh; public: DrawColumnHorizRGBACommand() { - dc_count = ::dc_count; - dc_iscale = ::dc_iscale; - dc_texturefrac = ::dc_texturefrac; - dc_source = ::dc_source; - dc_x = ::dc_x; - dc_yl = ::dc_yl; - dc_yh = ::dc_yh; + _count = dc_count; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _source = dc_source; + _x = dc_x; + _yl = dc_yl; + _yh = dc_yh; } void Execute(DrawerThread *thread) override { - int count = dc_count; + int count = _count; uint32_t *dest; fixed_t fracstep; fixed_t frac; @@ -1544,13 +1544,13 @@ public: return; { - int x = dc_x & 3; - dest = &thread->dc_temp_rgba[x + 4 * dc_yl]; + int x = _x & 3; + dest = &thread->dc_temp_rgba[x + 4 * _yl]; } - fracstep = dc_iscale; - frac = dc_texturefrac; + fracstep = _iscale; + frac = _texturefrac; - const BYTE *source = dc_source; + const BYTE *source = _source; if (count & 1) { *dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep; @@ -1587,34 +1587,34 @@ public: class FillColumnHorizRGBACommand : public DrawerCommand { - int dc_x; - int dc_yl; - int dc_yh; - int dc_count; - int dc_color; + int _x; + int _yl; + int _yh; + int _count; + int _color; public: FillColumnHorizRGBACommand() { - dc_x = ::dc_x; - dc_count = ::dc_count; - dc_color = ::dc_color; - dc_yl = ::dc_yl; - dc_yh = ::dc_yh; + _x = dc_x; + _count = dc_count; + _color = dc_color; + _yl = dc_yl; + _yh = dc_yh; } void Execute(DrawerThread *thread) override { - int count = dc_count; - int color = dc_color; + int count = _count; + int color = _color; uint32_t *dest; if (count <= 0) return; { - int x = dc_x & 3; - dest = &thread->dc_temp_rgba[x + 4 * dc_yl]; + int x = _x & 3; + dest = &thread->dc_temp_rgba[x + 4 * _yl]; } if (count & 1) { From 3089043b07c206db4a2d05cb27639378147d2851 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 17 Jun 2016 08:28:30 +0200 Subject: [PATCH 049/100] Fixed typo --- src/r_draw_rgba.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 0ab3e298a..15a76c689 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -173,12 +173,12 @@ public: class DrawerCommand { protected: - int dc_dest_y; + int _dest_y; public: DrawerCommand() { - dc_dest_y = static_cast((dc_dest - dc_destorg) / (dc_pitch * 4)); + _dest_y = static_cast((dc_dest - dc_destorg) / (dc_pitch * 4)); } virtual void Execute(DrawerThread *thread) = 0; From 000008e04dfa50fa5443d005d8076511dba1ca46 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 17 Jun 2016 10:14:33 +0200 Subject: [PATCH 050/100] Fixed empty canvas in kdizd intermission screen --- src/r_swrenderer.cpp | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index c4347236d..c788dfd54 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -370,7 +370,30 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin FTexture::FlipNonSquareBlockRemap(Pixels, Canvas->GetBuffer(), tex->GetWidth(), tex->GetHeight(), Canvas->GetPitch(), GPalette.Remap); } } - tex->SetUpdated(); + + if (r_swtruecolor) + { + // True color render still sometimes uses palette textures (for sprites, mostly). + // We need to make sure that both pixel buffers contain data: + int width = tex->GetWidth(); + int height = tex->GetHeight(); + BYTE *palbuffer = (BYTE *)tex->GetPixels(); + uint32_t *bgrabuffer = (uint32_t*)tex->GetPixelsBgra(); + for (int x = 0; x < width; x++) + { + for (int y = 0; y < height; y++) + { + uint32_t color = bgrabuffer[y]; + int r = RPART(color); + int g = GPART(color); + int b = BPART(color); + palbuffer[y] = RGB32k.RGB[r >> 3][g >> 3][b >> 3]; + } + palbuffer += height; + bgrabuffer += height; + } + } + fixedcolormap = savecolormap; realfixedcolormap = savecm; } From 5963f29afd0906d7c4d1c99f116e78907c60dac4 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 17 Jun 2016 10:16:34 +0200 Subject: [PATCH 051/100] Added missing SetUpdated --- src/r_swrenderer.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index c788dfd54..c81d2a110 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -394,6 +394,8 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin } } + tex->SetUpdated(); + fixedcolormap = savecolormap; realfixedcolormap = savecm; } From 12a50c140c7656ad944df027bd9f5c332f48f698 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 17 Jun 2016 10:47:30 +0200 Subject: [PATCH 052/100] Fix animated textures not updating in swtruecolor mode --- src/g_strife/strife_sbar.cpp | 11 +++++++++++ src/menu/playerdisplay.cpp | 11 +++++++++++ src/textures/textures.h | 1 + src/textures/warptexture.cpp | 12 ++++++++++++ 4 files changed, 35 insertions(+) diff --git a/src/g_strife/strife_sbar.cpp b/src/g_strife/strife_sbar.cpp index eb3fa2608..e1fcb3cda 100644 --- a/src/g_strife/strife_sbar.cpp +++ b/src/g_strife/strife_sbar.cpp @@ -34,6 +34,7 @@ public: const BYTE *GetColumn (unsigned int column, const Span **spans_out); const BYTE *GetPixels (); + const uint32_t *GetPixelsBgra() override; bool CheckModified (); void SetVial (int level); @@ -115,6 +116,16 @@ const BYTE *FHealthBar::GetPixels () return Pixels; } +const uint32_t *FHealthBar::GetPixelsBgra() +{ + if (NeedRefresh) + { + MakeTexture(); + PixelsBgra.clear(); + } + return FTexture::GetPixelsBgra(); +} + void FHealthBar::SetVial (int level) { if (level < 0) diff --git a/src/menu/playerdisplay.cpp b/src/menu/playerdisplay.cpp index 16671975a..7b7e9ca5d 100644 --- a/src/menu/playerdisplay.cpp +++ b/src/menu/playerdisplay.cpp @@ -78,6 +78,7 @@ public: const BYTE *GetColumn(unsigned int column, const Span **spans_out); const BYTE *GetPixels(); + const uint32_t *GetPixelsBgra() override; bool CheckModified(); protected: @@ -246,6 +247,16 @@ const BYTE *FBackdropTexture::GetPixels() return Pixels; } +const uint32_t *FBackdropTexture::GetPixelsBgra() +{ + if (LastRenderTic != gametic) + { + Render(); + PixelsBgra.clear(); + } + return FTexture::GetPixelsBgra(); +} + //============================================================================= // // This is one plasma and two rotozoomers. I think it turned out quite awesome. diff --git a/src/textures/textures.h b/src/textures/textures.h index 38d1ef487..3b4b0b8b3 100644 --- a/src/textures/textures.h +++ b/src/textures/textures.h @@ -484,6 +484,7 @@ public: virtual int CopyTrueColorPixels(FBitmap *bmp, int x, int y, int rotate=0, FCopyInfo *inf = NULL); const BYTE *GetColumn (unsigned int column, const Span **spans_out); const BYTE *GetPixels (); + const uint32_t *GetPixelsBgra() override; void Unload (); bool CheckModified (); diff --git a/src/textures/warptexture.cpp b/src/textures/warptexture.cpp index b6977dd77..0d18ab58f 100644 --- a/src/textures/warptexture.cpp +++ b/src/textures/warptexture.cpp @@ -93,6 +93,18 @@ const BYTE *FWarpTexture::GetPixels () return Pixels; } +const uint32_t *FWarpTexture::GetPixelsBgra() +{ + DWORD time = r_FrameTime; + + if (Pixels == NULL || time != GenTime) + { + MakeTexture(time); + PixelsBgra.clear(); + } + return FTexture::GetPixelsBgra(); +} + const BYTE *FWarpTexture::GetColumn (unsigned int column, const Span **spans_out) { DWORD time = r_FrameTime; From 35c078dc1e0f4dc67d0ed3cd09f292e257fb1b9c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 17 Jun 2016 11:24:21 +0200 Subject: [PATCH 053/100] Screenshot fix --- src/v_video.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/v_video.cpp b/src/v_video.cpp index 2cf04a29d..bcd49f920 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -445,7 +445,7 @@ void DCanvas::GetScreenshotBuffer(const BYTE *&buffer, int &pitch, ESSType &colo { Lock(true); buffer = GetBuffer(); - pitch = GetPitch(); + pitch = IsBgra() ? GetPitch() * 4 : GetPitch(); color_type = IsBgra() ? SS_BGRA : SS_PAL; } From f53e468f3f32fcb544842e86be5c06a63b0c3f31 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 17 Jun 2016 12:38:00 +0200 Subject: [PATCH 054/100] Fixed fill column rgba drawers --- src/r_draw.cpp | 11 +++-- src/r_draw.h | 1 + src/r_draw_rgba.cpp | 114 ++++++++++++++++++++++++++++---------------- 3 files changed, 82 insertions(+), 44 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 4dcdc3e6b..7829e2b77 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -154,6 +154,7 @@ fixed_t dc_iscale; fixed_t dc_texturefrac; int dc_color; // [RH] Color for column filler DWORD dc_srccolor; +uint32_t dc_srccolor_bgra; DWORD *dc_srcblend; // [RH] Source and destination DWORD *dc_destblend; // blending lookups fixed_t dc_srcalpha; // Alpha value used by dc_srcblend @@ -2702,10 +2703,10 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, if (style.Flags & STYLEF_ColorIsFixed) { - int x = fglevel >> 10; - int r = RPART(color); - int g = GPART(color); - int b = BPART(color); + uint32_t x = fglevel >> 10; + uint32_t r = RPART(color); + uint32_t g = GPART(color); + uint32_t b = BPART(color); // dc_color is used by the rt_* routines. It is indexed into dc_srcblend. dc_color = RGB32k.RGB[r>>3][g>>3][b>>3]; if (style.Flags & STYLEF_InvertSource) @@ -2714,6 +2715,8 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, g = 255 - g; b = 255 - b; } + uint32_t alpha = clamp(fglevel >> (FRACBITS - 8), 0, 255); + dc_srccolor_bgra = (alpha << 24) | (r << 16) | (g << 8) | b; // dc_srccolor is used by the R_Fill* routines. It is premultiplied // with the alpha. dc_srccolor = ((((r*x)>>4)<<20) | ((g*x)>>4) | ((((b)*x)>>4)<<10)) & 0x3feffbff; diff --git a/src/r_draw.h b/src/r_draw.h index a31183405..99ee4d10d 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -63,6 +63,7 @@ extern double dc_texturemid; extern "C" fixed_t dc_texturefrac; extern "C" int dc_color; // [RH] For flat colors (no texturing) extern "C" DWORD dc_srccolor; +extern "C" uint32_t dc_srccolor_bgra; extern "C" DWORD *dc_srcblend; extern "C" DWORD *dc_destblend; extern "C" fixed_t dc_srcalpha; diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 722fbb8cd..491c6ab98 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -344,8 +344,7 @@ class FillAddColumnRGBACommand : public DrawerCommand int _count; BYTE *_dest; int _pitch; - fixed_t _light; - int _color; + uint32_t _srccolor; public: FillAddColumnRGBACommand() @@ -353,8 +352,7 @@ public: _count = dc_count; _dest = dc_dest; _pitch = dc_pitch; - _light = dc_light; - _color = dc_color; + _srccolor = dc_srccolor_bgra; } void Execute(DrawerThread *thread) override @@ -369,10 +367,18 @@ public: dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); - uint32_t fg_red = (fg >> 24) & 0xff; - uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg = _srccolor; + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; + uint32_t fg_alpha = fg >> 24; + fg_alpha += fg_alpha >> 7; + + fg_red *= fg_alpha; + fg_green *= fg_alpha; + fg_blue *= fg_alpha; + + uint32_t inv_alpha = 256 - fg_alpha; do { @@ -380,9 +386,9 @@ public: uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = (fg_red + bg_red + 1) / 2; - uint32_t green = (fg_green + bg_green + 1) / 2; - uint32_t blue = (fg_blue + bg_blue + 1) / 2; + uint32_t red = (fg_red + bg_red * inv_alpha) / 256; + uint32_t green = (fg_green + bg_green * inv_alpha) / 256; + uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -395,8 +401,10 @@ class FillAddClampColumnRGBACommand : public DrawerCommand int _count; BYTE *_dest; int _pitch; - fixed_t _light; int _color; + uint32_t _srccolor; + fixed_t _srcalpha; + fixed_t _destalpha; public: FillAddClampColumnRGBACommand() @@ -404,8 +412,10 @@ public: _count = dc_count; _dest = dc_dest; _pitch = dc_pitch; - _light = dc_light; _color = dc_color; + _srccolor = dc_srccolor_bgra; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -420,20 +430,26 @@ public: dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); - uint32_t fg_red = (fg >> 24) & 0xff; - uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg = _srccolor; + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + + fg_red *= fg_alpha; + fg_green *= fg_alpha; + fg_blue *= fg_alpha; + + do { - do - { uint32_t bg_red = (*dest >> 16) & 0xff; uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(fg_red + bg_red, 0, 255); - uint32_t green = clamp(fg_green + bg_green, 0, 255); - uint32_t blue = clamp(fg_blue + bg_blue, 0, 255); + uint32_t red = clamp((fg_red + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue + bg_blue * bg_alpha) / 256, 0, 255); *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -447,7 +463,9 @@ class FillSubClampColumnRGBACommand : public DrawerCommand BYTE *_dest; int _pitch; int _color; - fixed_t _light; + uint32_t _srccolor; + fixed_t _srcalpha; + fixed_t _destalpha; public: FillSubClampColumnRGBACommand() @@ -456,7 +474,9 @@ public: _dest = dc_dest; _pitch = dc_pitch; _color = dc_color; - _light = dc_light; + _srccolor = dc_srccolor_bgra; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -471,20 +491,25 @@ public: dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); - uint32_t fg_red = (fg >> 24) & 0xff; - uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg = _srccolor; + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - do - { + fg_red *= fg_alpha; + fg_green *= fg_alpha; + fg_blue *= fg_alpha; + + do { uint32_t bg_red = (*dest >> 16) & 0xff; uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 - fg_red + bg_red, 256, 256 + 255) - 255; - uint32_t green = clamp(256 - fg_green + bg_green, 256, 256 + 255) - 255; - uint32_t blue = clamp(256 - fg_blue + bg_blue, 256, 256 + 255) - 255; + uint32_t red = clamp((0x10000 - fg_red + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -498,7 +523,9 @@ class FillRevSubClampColumnRGBACommand : public DrawerCommand BYTE *_dest; int _pitch; int _color; - fixed_t _light; + uint32_t _srccolor; + fixed_t _srcalpha; + fixed_t _destalpha; public: FillRevSubClampColumnRGBACommand() @@ -507,7 +534,9 @@ public: _dest = dc_dest; _pitch = dc_pitch; _color = dc_color; - _light = dc_light; + _srccolor = dc_srccolor_bgra; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override @@ -522,20 +551,25 @@ public: dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); - uint32_t fg_red = (fg >> 24) & 0xff; - uint32_t fg_green = (fg >> 16) & 0xff; + uint32_t fg = _srccolor; + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - do - { + fg_red *= fg_alpha; + fg_green *= fg_alpha; + fg_blue *= fg_alpha; + + do { uint32_t bg_red = (*dest >> 16) & 0xff; uint32_t bg_green = (*dest >> 8) & 0xff; uint32_t bg_blue = (*dest) & 0xff; - uint32_t red = clamp(256 + fg_red - bg_red, 256, 256 + 255) - 255; - uint32_t green = clamp(256 + fg_green - bg_green, 256, 256 + 255) - 255; - uint32_t blue = clamp(256 + fg_blue - bg_blue, 256, 256 + 255) - 255; + uint32_t red = clamp((0x10000 + fg_red - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; From 822bbd5b9a015141d6ca17d19df8b8a8600f220b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 17 Jun 2016 13:40:23 +0200 Subject: [PATCH 055/100] Fuzz (invisibility) adjustments --- src/r_draw_rgba.cpp | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 491c6ab98..bdbcd1250 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -584,8 +584,8 @@ class DrawFuzzColumnRGBACommand : public DrawerCommand int _yh; BYTE *_destorg; int _pitch; - int fuzzpos; - int fuzzviewheight; + int _fuzzpos; + int _fuzzviewheight; public: DrawFuzzColumnRGBACommand() @@ -595,8 +595,8 @@ public: _yh = dc_yh; _destorg = dc_destorg; _pitch = dc_pitch; - fuzzpos = ::fuzzpos; - fuzzviewheight = ::fuzzviewheight; + _fuzzpos = fuzzpos; + _fuzzviewheight = fuzzviewheight; } void Execute(DrawerThread *thread) override @@ -609,8 +609,8 @@ public: _yl = 1; // .. and high. - if (_yh > fuzzviewheight) - _yh = fuzzviewheight; + if (_yh > _fuzzviewheight) + _yh = _fuzzviewheight; count = thread->count_for_thread(_yl, _yh - _yl + 1); @@ -622,7 +622,7 @@ public: int pitch = _pitch * thread->num_cores; int fuzzstep = thread->num_cores; - int fuzz = (fuzzpos + thread->skipped_by_thread(_yl)) % FUZZTABLE; + int fuzz = (_fuzzpos + thread->skipped_by_thread(_yl)) % FUZZTABLE; while (count > 0) { @@ -640,9 +640,9 @@ public: uint32_t bg_green = (bg >> 8) & 0xff; uint32_t bg_blue = (bg) & 0xff; - uint32_t red = bg_red * 3 / 4; - uint32_t green = bg_green * 3 / 4; - uint32_t blue = bg_blue * 3 / 4; + uint32_t red = bg_red * 7 / 8; + uint32_t green = bg_green * 7 / 8; + uint32_t blue = bg_blue * 7 / 8; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -3896,7 +3896,13 @@ void R_FillRevSubClampColumn_rgba() void R_DrawFuzzColumn_rgba() { DrawerCommandQueue::QueueCommand(); - fuzzpos = (fuzzpos + dc_yh - dc_yl) % FUZZTABLE; + + if (dc_yl == 0) + dc_yl = 1; + if (dc_yh > fuzzviewheight) + dc_yh = fuzzviewheight; + + fuzzpos = (fuzzpos + dc_yh - dc_yl + 1) % FUZZTABLE; } void R_DrawAddColumn_rgba() From 4ef2fb3cdb19d71a67c21c860f7d0bea96db8cb2 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 17 Jun 2016 14:45:52 +0200 Subject: [PATCH 056/100] Fixed multithreaded rendering issue with the fuzz effect --- src/r_draw_rgba.cpp | 75 +++++++++++++++++++++++++++++++++------------ 1 file changed, 55 insertions(+), 20 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index bdbcd1250..b1ee1f02c 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -604,26 +604,48 @@ public: int count; uint32_t *dest; - // Adjust borders. Low... - if (_yl == 0) - _yl = 1; + int yl = MAX(_yl, 1); + int yh = MIN(_yh, _fuzzviewheight); - // .. and high. - if (_yh > _fuzzviewheight) - _yh = _fuzzviewheight; - - count = thread->count_for_thread(_yl, _yh - _yl + 1); + count = thread->count_for_thread(yl, yh - yl + 1); // Zero length. if (count <= 0) return; - dest = thread->dest_for_thread(_yl, _pitch, ylookup[_yl] + _x + (uint32_t*)_destorg); + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + _x + (uint32_t*)_destorg); int pitch = _pitch * thread->num_cores; int fuzzstep = thread->num_cores; - int fuzz = (_fuzzpos + thread->skipped_by_thread(_yl)) % FUZZTABLE; + int fuzz = (_fuzzpos + thread->skipped_by_thread(yl)) % FUZZTABLE; + yl += thread->skipped_by_thread(yl); + + // Handle the case where we would go out of bounds at the top: + if (yl < fuzzstep) + { + count--; + + uint32_t bg = dest[fuzzoffset[fuzz] * fuzzstep + pitch]; + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = (bg) & 0xff; + + uint32_t red = bg_red * 3 / 4; + uint32_t green = bg_green * 3 / 4; + uint32_t blue = bg_blue * 3 / 4; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + fuzz += fuzzstep; + fuzz %= FUZZTABLE; + } + + bool lowerbounds = (yl + count * fuzzstep > _fuzzviewheight); + if (lowerbounds) + count--; + + // Fuzz where fuzzoffset stays within bounds while (count > 0) { int available = (FUZZTABLE - fuzz); @@ -635,14 +657,14 @@ public: count -= cnt; do { - uint32_t bg = dest[fuzzoffset[fuzz]]; + uint32_t bg = dest[fuzzoffset[fuzz] * fuzzstep]; uint32_t bg_red = (bg >> 16) & 0xff; uint32_t bg_green = (bg >> 8) & 0xff; uint32_t bg_blue = (bg) & 0xff; - uint32_t red = bg_red * 7 / 8; - uint32_t green = bg_green * 7 / 8; - uint32_t blue = bg_blue * 7 / 8; + uint32_t red = bg_red * 3 / 4; + uint32_t green = bg_green * 3 / 4; + uint32_t blue = bg_blue * 3 / 4; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -651,6 +673,21 @@ public: fuzz %= FUZZTABLE; } + + // Handle the case where we would go out of bounds at the bottom + if (lowerbounds) + { + uint32_t bg = dest[fuzzoffset[fuzz] * fuzzstep - pitch]; + uint32_t bg_red = (bg >> 16) & 0xff; + uint32_t bg_green = (bg >> 8) & 0xff; + uint32_t bg_blue = (bg) & 0xff; + + uint32_t red = bg_red * 3 / 4; + uint32_t green = bg_green * 3 / 4; + uint32_t blue = bg_blue * 3 / 4; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + } } }; @@ -3897,12 +3934,10 @@ void R_DrawFuzzColumn_rgba() { DrawerCommandQueue::QueueCommand(); - if (dc_yl == 0) - dc_yl = 1; - if (dc_yh > fuzzviewheight) - dc_yh = fuzzviewheight; - - fuzzpos = (fuzzpos + dc_yh - dc_yl + 1) % FUZZTABLE; + dc_yl = MAX(dc_yl, 1); + dc_yh = MIN(dc_yh, fuzzviewheight); + if (dc_yl <= dc_yh) + fuzzpos = (fuzzpos + dc_yh - dc_yl + 1) % FUZZTABLE; } void R_DrawAddColumn_rgba() From e7cdcd9c0af6d82c8acc7c14102fbc8c4e34da1e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 18 Jun 2016 05:20:34 +0200 Subject: [PATCH 057/100] Change to one pass rendering to remove fuzz artifact --- src/r_draw_rgba.cpp | 6 ++++-- src/r_draw_rgba.h | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index b1ee1f02c..6021c9265 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -624,8 +624,6 @@ public: // Handle the case where we would go out of bounds at the top: if (yl < fuzzstep) { - count--; - uint32_t bg = dest[fuzzoffset[fuzz] * fuzzstep + pitch]; uint32_t bg_red = (bg >> 16) & 0xff; uint32_t bg_green = (bg >> 8) & 0xff; @@ -639,6 +637,10 @@ public: dest += pitch; fuzz += fuzzstep; fuzz %= FUZZTABLE; + + count--; + if (count == 0) + return; } bool lowerbounds = (yl + count * fuzzstep > _fuzzviewheight); diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 15a76c689..83977d65c 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -209,8 +209,8 @@ class DrawerCommandQueue int threaded_render = 0; DrawerThread single_core_thread; - int num_passes = 2; - int rows_in_pass = 540; + int num_passes = 1; + int rows_in_pass = MAXHEIGHT; void StartThreads(); void StopThreads(); From 3e7eb79729049302bf80e79908af8a8006c3841c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 18 Jun 2016 11:17:59 +0200 Subject: [PATCH 058/100] Added some experimental AVX2 drawers --- src/r_draw_rgba.cpp | 420 ++++++++++++++++++++++++++++++++++--------- src/r_draw_rgba.h | 91 ++++++++++ src/r_drawt_rgba.cpp | 46 ++--- 3 files changed, 454 insertions(+), 103 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 6021c9265..271250855 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -44,9 +44,14 @@ #include "x86.h" #ifndef NO_SSE #include +#include #endif #include +#ifdef _MSC_VER +#pragma warning(disable: 4752) // warning C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX +#endif + extern int vlinebits; extern int mvlinebits; extern int tmvlinebits; @@ -58,6 +63,8 @@ extern int wallshade; CVAR(Bool, r_multithreaded, true, 0) +//#define USE_AVX // Use AVX2 256 bit intrinsics (requires Haswell or newer) + ///////////////////////////////////////////////////////////////////////////// DrawerCommandQueue *DrawerCommandQueue::Instance() @@ -230,14 +237,14 @@ void DrawerCommandQueue::StopThreads() class DrawColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; DWORD _texturefrac; DWORD _iscale; fixed_t _light; - const BYTE *_source; + const BYTE * RESTRICT _source; int _pitch; ShadeConstants _shade_constants; - BYTE *_colormap; + BYTE * RESTRICT _colormap; public: DrawColumnRGBACommand() @@ -297,7 +304,7 @@ public: class FillColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; fixed_t _light; int _pitch; int _color; @@ -342,7 +349,7 @@ public: class FillAddColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; int _pitch; uint32_t _srccolor; @@ -399,7 +406,7 @@ public: class FillAddClampColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; int _pitch; int _color; uint32_t _srccolor; @@ -460,7 +467,7 @@ public: class FillSubClampColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; int _pitch; int _color; uint32_t _srccolor; @@ -520,7 +527,7 @@ public: class FillRevSubClampColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; int _pitch; int _color; uint32_t _srccolor; @@ -582,7 +589,7 @@ class DrawFuzzColumnRGBACommand : public DrawerCommand int _x; int _yl; int _yh; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; int _fuzzpos; int _fuzzviewheight; @@ -696,16 +703,16 @@ public: class DrawAddColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; DWORD _iscale; DWORD _texturefrac; - const BYTE *_source; + const BYTE * RESTRICT _source; int _pitch; fixed_t _light; ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - BYTE *_colormap; + BYTE * RESTRICT _colormap; public: DrawAddColumnRGBACommand() @@ -779,11 +786,11 @@ class DrawTranslatedColumnRGBACommand : public DrawerCommand int _count; fixed_t _light; ShadeConstants _shade_constants; - BYTE *_dest; + BYTE * RESTRICT _dest; DWORD _iscale; DWORD _texturefrac; - BYTE *_translation; - const BYTE *_source; + BYTE * RESTRICT _translation; + const BYTE * RESTRICT _source; int _pitch; public: @@ -840,11 +847,11 @@ class DrawTlatedAddColumnRGBACommand : public DrawerCommand int _count; fixed_t _light; ShadeConstants _shade_constants; - BYTE *_dest; + BYTE * RESTRICT _dest; DWORD _iscale; DWORD _texturefrac; - BYTE *_translation; - const BYTE *_source; + BYTE * RESTRICT _translation; + const BYTE * RESTRICT _source; int _pitch; fixed_t _srcalpha; fixed_t _destalpha; @@ -920,12 +927,12 @@ class DrawShadedColumnRGBACommand : public DrawerCommand { private: int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; DWORD _iscale; DWORD _texturefrac; fixed_t _light; - const BYTE *_source; - lighttable_t *_colormap; + const BYTE * RESTRICT _source; + lighttable_t * RESTRICT _colormap; int _color; int _pitch; @@ -993,10 +1000,10 @@ public: class DrawAddClampColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; DWORD _iscale; DWORD _texturefrac; - const BYTE *_source; + const BYTE * RESTRICT _source; int _pitch; fixed_t _light; ShadeConstants _shade_constants; @@ -1069,11 +1076,11 @@ public: class DrawAddClampTranslatedColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; DWORD _iscale; DWORD _texturefrac; - BYTE *_translation; - const BYTE *_source; + BYTE * RESTRICT _translation; + const BYTE * RESTRICT _source; int _pitch; fixed_t _light; ShadeConstants _shade_constants; @@ -1148,10 +1155,10 @@ public: class DrawSubClampColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; DWORD _iscale; DWORD _texturefrac; - const BYTE *_source; + const BYTE * RESTRICT _source; int _pitch; fixed_t _light; ShadeConstants _shade_constants; @@ -1224,16 +1231,16 @@ public: class DrawSubClampTranslatedColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; DWORD _iscale; DWORD _texturefrac; - const BYTE *_source; + const BYTE * RESTRICT _source; int _pitch; fixed_t _light; ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - BYTE *_translation; + BYTE * RESTRICT _translation; public: DrawSubClampTranslatedColumnRGBACommand() @@ -1303,10 +1310,10 @@ public: class DrawRevSubClampColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; DWORD _iscale; DWORD _texturefrac; - const BYTE *_source; + const BYTE * RESTRICT _source; int _pitch; fixed_t _light; ShadeConstants _shade_constants; @@ -1378,16 +1385,16 @@ public: class DrawRevSubClampTranslatedColumnRGBACommand : public DrawerCommand { int _count; - BYTE *_dest; + BYTE * RESTRICT _dest; DWORD _iscale; DWORD _texturefrac; - const BYTE *_source; + const BYTE * RESTRICT _source; int _pitch; fixed_t _light; ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - BYTE *_translation; + BYTE * RESTRICT _translation; public: DrawRevSubClampTranslatedColumnRGBACommand() @@ -1422,8 +1429,8 @@ public: frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); { - BYTE *translation = _translation; - const BYTE *source = _source; + BYTE * RESTRICT translation = _translation; + const BYTE * RESTRICT source = _source; int pitch = _pitch * thread->num_cores; uint32_t light = calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; @@ -1456,7 +1463,7 @@ public: class DrawSpanRGBACommand : public DrawerCommand { - const uint32_t *_source; + const uint32_t * RESTRICT _source; fixed_t _xfrac; fixed_t _yfrac; fixed_t _xstep; @@ -1466,7 +1473,7 @@ class DrawSpanRGBACommand : public DrawerCommand int _y; int _xbits; int _ybits; - BYTE *_destorg; + BYTE * RESTRICT _destorg; fixed_t _light; ShadeConstants _shade_constants; @@ -1539,6 +1546,181 @@ public: BYTE xshift = yshift - _xbits; int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + // Current texture index in u,v. + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + // Lookup pixel from flat texture tile + *dest++ = shade_bgra(source[spot], light, shade_constants); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } +#elif defined(USE_AVX) + void Execute(DrawerThread *thread) override + { + if (thread->line_skipped_by_thread(_y)) + return; + + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const uint32_t* source = _source; + int count; + int spot; + + xfrac = _xfrac; + yfrac = _yfrac; + + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; + + count = _x2 - _x1 + 1; + + xstep = _xstep; + ystep = _ystep; + + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; + + if (_xbits == 6 && _ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + + int sse_count = count / 8; + count -= sse_count * 8; + + if (shade_constants.simple_shade) + { + AVX2_SHADE_SIMPLE_INIT(light); + + while (sse_count--) + { + uint32_t fg_pixels[8]; + for (int i = 0; i < 8; i++) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + fg_pixels[i] = source[spot]; + xfrac += xstep; + yfrac += ystep; + } + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m256i fg = _mm256_loadu_si256((const __m256i*)fg_pixels); + AVX2_SHADE_SIMPLE(fg); + _mm256_storeu_si256((__m256i*)dest, fg); + + // Next step in u,v. + dest += 8; + } + } + else + { + AVX2_SHADE_INIT(light, shade_constants); + + while (sse_count--) + { + uint32_t fg_pixels[8]; + for (int i = 0; i < 8; i++) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + fg_pixels[i] = source[spot]; + xfrac += xstep; + yfrac += ystep; + } + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m256i fg = _mm256_loadu_si256((const __m256i*)fg_pixels); + AVX2_SHADE(fg, shade_constants); + _mm256_storeu_si256((__m256i*)dest, fg); + + // Next step in u,v. + dest += 8; + } + } + + if (count == 0) + return; + + do + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + + // Lookup pixel from flat texture tile + *dest++ = shade_bgra(source[spot], light, shade_constants); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + + int sse_count = count / 8; + count -= sse_count * 8; + + if (shade_constants.simple_shade) + { + AVX2_SHADE_SIMPLE_INIT(light); + + while (sse_count--) + { + uint32_t fg_pixels[8]; + for (int i = 0; i < 8; i++) + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + fg_pixels[i] = source[spot]; + xfrac += xstep; + yfrac += ystep; + } + + // Lookup pixel from flat texture tile + __m256i fg = _mm256_loadu_si256((const __m256i*)fg_pixels); + AVX2_SHADE_SIMPLE(fg); + _mm256_storeu_si256((__m256i*)dest, fg); + dest += 8; + } + } + else + { + AVX2_SHADE_INIT(light, shade_constants); + + while (sse_count--) + { + uint32_t fg_pixels[8]; + for (int i = 0; i < 8; i++) + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + fg_pixels[i] = source[spot]; + xfrac += xstep; + yfrac += ystep; + } + + // Lookup pixel from flat texture tile + __m256i fg = _mm256_loadu_si256((const __m256i*)fg_pixels); + AVX2_SHADE_SIMPLE(fg); + _mm256_storeu_si256((__m256i*)dest, fg); + dest += 4; + } + } + + if (count == 0) + return; + do { // Current texture index in u,v. @@ -1777,12 +1959,12 @@ public: class DrawSpanMaskedRGBACommand : public DrawerCommand { - const uint32_t *_source; + const uint32_t * RESTRICT _source; fixed_t _light; ShadeConstants _shade_constants; fixed_t _xfrac; fixed_t _yfrac; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _x1; int _x2; int _y1; @@ -1880,12 +2062,12 @@ public: class DrawSpanTranslucentRGBACommand : public DrawerCommand { - const uint32_t *_source; + const uint32_t * RESTRICT _source; fixed_t _light; ShadeConstants _shade_constants; fixed_t _xfrac; fixed_t _yfrac; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _x1; int _x2; int _y1; @@ -2006,12 +2188,12 @@ public: class DrawSpanMaskedTranslucentRGBACommand : public DrawerCommand { - const uint32_t *_source; + const uint32_t * RESTRICT _source; fixed_t _light; ShadeConstants _shade_constants; fixed_t _xfrac; fixed_t _yfrac; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _x1; int _x2; int _y1; @@ -2142,12 +2324,12 @@ public: class DrawSpanAddClampRGBACommand : public DrawerCommand { - const uint32_t *_source; + const uint32_t * RESTRICT _source; fixed_t _light; ShadeConstants _shade_constants; fixed_t _xfrac; fixed_t _yfrac; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _x1; int _x2; int _y1; @@ -2268,12 +2450,12 @@ public: class DrawSpanMaskedAddClampRGBACommand : public DrawerCommand { - const uint32_t *_source; + const uint32_t * RESTRICT _source; fixed_t _light; ShadeConstants _shade_constants; fixed_t _xfrac; fixed_t _yfrac; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _x1; int _x2; int _y1; @@ -2407,7 +2589,7 @@ class FillSpanRGBACommand : public DrawerCommand int _x1; int _x2; int _y; - BYTE *_destorg; + BYTE * RESTRICT _destorg; fixed_t _light; int _color; @@ -2441,8 +2623,8 @@ class Vlinec1RGBACommand : public DrawerCommand DWORD _iscale; DWORD _texturefrac; int _count; - const BYTE *_source; - BYTE *_dest; + const BYTE * RESTRICT _source; + BYTE * RESTRICT _dest; int vlinebits; int _pitch; fixed_t _light; @@ -2489,7 +2671,7 @@ public: class Vlinec4RGBACommand : public DrawerCommand { - BYTE *_dest; + BYTE * RESTRICT _dest; int _count; int _pitch; ShadeConstants _shade_constants; @@ -2497,7 +2679,7 @@ class Vlinec4RGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const uint32 *bufplce[4]; + const uint32 * RESTRICT bufplce[4]; public: Vlinec4RGBACommand() @@ -2553,6 +2735,84 @@ public: dest += pitch; } while (--count); } +#elif defined(USE_AVX) + void Execute(DrawerThread *thread) override + { + int count = thread->count_for_thread(_dest_y, _count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int bits = vlinebits; + int pitch = _pitch * thread->num_cores; + + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = _shade_constants; + + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + + if (count & 1) + { + DWORD place; + dest[0] = shade_bgra(bufplce[0][(place = local_vplce[0]) >> bits], light0, shade_constants); local_vplce[0] = place + local_vince[0]; + dest[1] = shade_bgra(bufplce[1][(place = local_vplce[1]) >> bits], light1, shade_constants); local_vplce[1] = place + local_vince[1]; + dest[2] = shade_bgra(bufplce[2][(place = local_vplce[2]) >> bits], light2, shade_constants); local_vplce[2] = place + local_vince[2]; + dest[3] = shade_bgra(bufplce[3][(place = local_vplce[3]) >> bits], light3, shade_constants); local_vplce[3] = place + local_vince[3]; + dest += pitch; + } + count /= 2; + + // Assume all columns come from the same texture (which they do): + const uint32_t *base_addr = MIN(MIN(MIN(bufplce[0], bufplce[1]), bufplce[2]), bufplce[3]); + __m256i column_offsets = _mm256_set_epi32( + bufplce[3] - base_addr, bufplce[2] - base_addr, bufplce[1] - base_addr, bufplce[0] - base_addr, + bufplce[3] - base_addr, bufplce[2] - base_addr, bufplce[1] - base_addr, bufplce[0] - base_addr); + + __m256i place = _mm256_set_epi32( + local_vplce[3] + local_vince[3], local_vplce[2] + local_vince[2], local_vplce[1] + local_vince[1], local_vplce[0] + local_vince[0], + local_vplce[3], local_vplce[2], local_vplce[1], local_vplce[0]); + + __m256i step = _mm256_set_epi32( + local_vince[3], local_vince[2], local_vince[1], local_vince[0], + local_vince[3], local_vince[2], local_vince[1], local_vince[0]); + step = _mm256_add_epi32(step, step); + + if (shade_constants.simple_shade) + { + AVX2_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + while (count--) + { + __m256i fg = _mm256_i32gather_epi32((const int *)base_addr, _mm256_add_epi32(column_offsets, _mm256_srli_epi32(place, bits)), 4); + place = _mm256_add_epi32(place, step); + AVX2_SHADE_SIMPLE(fg); + _mm256_storeu2_m128i((__m128i*)(dest + pitch), (__m128i*)dest, fg); + dest += pitch * 2; + } + } + else + { + AVX2_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + while (count--) + { + __m256i fg = _mm256_i32gather_epi32((const int *)base_addr, _mm256_add_epi32(column_offsets, _mm256_srai_epi32(place, bits)), 4); + place = _mm256_add_epi32(place, step); + AVX2_SHADE(fg, shade_constants); + _mm256_storeu2_m128i((__m128i*)(dest + pitch), (__m128i*)dest, fg); + dest += pitch * 2; + } + } + } #else void Execute(DrawerThread *thread) override { @@ -2641,8 +2901,8 @@ class Mvlinec1RGBACommand : public DrawerCommand DWORD _iscale; DWORD _texturefrac; int _count; - const BYTE *_source; - BYTE *_dest; + const BYTE * RESTRICT _source; + BYTE * RESTRICT _dest; int mvlinebits; int _pitch; fixed_t _light; @@ -2693,7 +2953,7 @@ public: class Mvlinec4RGBACommand : public DrawerCommand { - BYTE *_dest; + BYTE * RESTRICT _dest; int _count; int _pitch; ShadeConstants _shade_constants; @@ -2701,7 +2961,7 @@ class Mvlinec4RGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const uint32 *bufplce[4]; + const uint32 * RESTRICT bufplce[4]; public: Mvlinec4RGBACommand() @@ -2852,8 +3112,8 @@ class Tmvline1AddRGBACommand : public DrawerCommand DWORD _iscale; DWORD _texturefrac; int _count; - const BYTE *_source; - BYTE *_dest; + const BYTE * RESTRICT _source; + BYTE * RESTRICT _dest; int tmvlinebits; int _pitch; fixed_t _light; @@ -2924,7 +3184,7 @@ public: class Tmvline4AddRGBACommand : public DrawerCommand { - BYTE *_dest; + BYTE * RESTRICT _dest; int _count; int _pitch; ShadeConstants _shade_constants; @@ -2934,7 +3194,7 @@ class Tmvline4AddRGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const uint32 *bufplce[4]; + const uint32 * RESTRICT bufplce[4]; public: Tmvline4AddRGBACommand() @@ -3019,8 +3279,8 @@ class Tmvline1AddClampRGBACommand : public DrawerCommand DWORD _iscale; DWORD _texturefrac; int _count; - const BYTE *_source; - BYTE *_dest; + const BYTE * RESTRICT _source; + BYTE * RESTRICT _dest; int tmvlinebits; int _pitch; fixed_t _light; @@ -3091,7 +3351,7 @@ public: class Tmvline4AddClampRGBACommand : public DrawerCommand { - BYTE *_dest; + BYTE * RESTRICT _dest; int _count; int _pitch; ShadeConstants _shade_constants; @@ -3101,7 +3361,7 @@ class Tmvline4AddClampRGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const uint32 *bufplce[4]; + const uint32 *RESTRICT bufplce[4]; public: Tmvline4AddClampRGBACommand() @@ -3186,8 +3446,8 @@ class Tmvline1SubClampRGBACommand : public DrawerCommand DWORD _iscale; DWORD _texturefrac; int _count; - const BYTE *_source; - BYTE *_dest; + const BYTE * RESTRICT _source; + BYTE * RESTRICT _dest; int tmvlinebits; int _pitch; fixed_t _light; @@ -3258,7 +3518,7 @@ public: class Tmvline4SubClampRGBACommand : public DrawerCommand { - BYTE *_dest; + BYTE * RESTRICT _dest; int _count; int _pitch; ShadeConstants _shade_constants; @@ -3268,7 +3528,7 @@ class Tmvline4SubClampRGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const uint32 *bufplce[4]; + const uint32 *RESTRICT bufplce[4]; public: Tmvline4SubClampRGBACommand() @@ -3353,8 +3613,8 @@ class Tmvline1RevSubClampRGBACommand : public DrawerCommand DWORD _iscale; DWORD _texturefrac; int _count; - const BYTE *_source; - BYTE *_dest; + const BYTE * RESTRICT _source; + BYTE * RESTRICT _dest; int tmvlinebits; int _pitch; fixed_t _light; @@ -3425,7 +3685,7 @@ public: class Tmvline4RevSubClampRGBACommand : public DrawerCommand { - BYTE *_dest; + BYTE * RESTRICT _dest; int _count; int _pitch; ShadeConstants _shade_constants; @@ -3435,7 +3695,7 @@ class Tmvline4RevSubClampRGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const uint32 *bufplce[4]; + const uint32 *RESTRICT bufplce[4]; public: Tmvline4RevSubClampRGBACommand() @@ -3520,7 +3780,7 @@ class DrawFogBoundaryLineRGBACommand : public DrawerCommand int _y; int _x; int _x2; - BYTE *_destorg; + BYTE * RESTRICT _destorg; fixed_t _light; ShadeConstants _shade_constants; @@ -3592,10 +3852,10 @@ class DrawTiltedSpanRGBACommand : public DrawerCommand int _y; int _x1; int _x2; - BYTE *_destorg; + BYTE * RESTRICT _destorg; fixed_t _light; ShadeConstants _shade_constants; - const BYTE *_source; + const BYTE * RESTRICT _source; public: DrawTiltedSpanRGBACommand(int y, int x1, int x2) @@ -3637,7 +3897,7 @@ class DrawColoredSpanRGBACommand : public DrawerCommand int _y; int _x1; int _x2; - BYTE *_destorg; + BYTE * RESTRICT _destorg; fixed_t _light; int _color; @@ -3678,7 +3938,7 @@ class FillTransColumnRGBACommand : public DrawerCommand int _y2; int _color; int _a; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; fixed_t _light; diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 83977d65c..8f051b4cb 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -290,6 +290,17 @@ public: #endif #endif +// Promise compiler we have no aliasing of this pointer +#ifndef RESTRICT +#if defined(_MSC_VER) +#define RESTRICT __restrict +#elif defined(__GNUC__) +#define RESTRICT __restrict__ +#else +#define RESTRICT +#endif +#endif + // calculates the light constant passed to the shade_pal_index function FORCEINLINE uint32_t calc_light_multiplier(dsfixed_t light) { @@ -413,6 +424,86 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) return 0xff000000 | (red << 16) | (green << 8) | blue; } +// Calculate constants for a simple shade +#define AVX2_SHADE_SIMPLE_INIT(light) \ + __m256i mlight = _mm256_set_epi16(256, light, light, light, 256, light, light, light, 256, light, light, light, 256, light, light, light); + +// Calculate constants for a simple shade with different light levels for each pixel +#define AVX2_SHADE_SIMPLE_INIT4(light3, light2, light1, light0) \ + __m256i mlight = _mm256_set_epi16(256, light3, light3, light3, 256, light2, light2, light2, 256, light1, light1, light1, 256, light0, light0, light0); + +// Simple shade 8 pixels +#define AVX2_SHADE_SIMPLE(fg) { \ + __m256i fg_hi = _mm256_unpackhi_epi8(fg, _mm256_setzero_si256()); \ + __m256i fg_lo = _mm256_unpacklo_epi8(fg, _mm256_setzero_si256()); \ + fg_hi = _mm256_mullo_epi16(fg_hi, mlight); \ + fg_hi = _mm256_srli_epi16(fg_hi, 8); \ + fg_lo = _mm256_mullo_epi16(fg_lo, mlight); \ + fg_lo = _mm256_srli_epi16(fg_lo, 8); \ + fg = _mm256_packus_epi16(fg_lo, fg_hi); \ +} + +// Calculate constants for a complex shade +#define AVX2_SHADE_INIT(light, shade_constants) \ + __m256i mlight = _mm256_set_epi16(256, light, light, light, 256, light, light, light, 256, light, light, light, 256, light, light, light); \ + __m256i color = _mm256_set_epi16( \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ + __m256i fade = _mm256_set_epi16( \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ + __m256i fade_amount = _mm256_mullo_epi16(fade, _mm256_subs_epu16(_mm256_set1_epi16(256), mlight)); \ + __m256i desaturate = _mm256_set1_epi16(shade_constants.desaturate); \ + __m256i inv_desaturate = _mm256_set1_epi16(256 - shade_constants.desaturate); + +// Calculate constants for a complex shade with different light levels for each pixel +#define AVX2_SHADE_INIT4(light3, light2, light1, light0, shade_constants) \ + __m256i mlight = _mm256_set_epi16(256, light3, light3, light3, 256, light2, light2, light2, 256, light1, light1, light1, 256, light0, light0, light0); \ + __m256i color = _mm256_set_epi16( \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ + __m256i fade = _mm256_set_epi16( \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ + __m256i fade_amount = _mm256_mullo_epi16(fade, _mm256_subs_epu16(_mm256_set1_epi16(256), mlight)); \ + __m256i desaturate = _mm256_set1_epi16(shade_constants.desaturate); \ + __m256i inv_desaturate = _mm256_set1_epi16(256 - shade_constants.desaturate); + +// Complex shade 8 pixels +#define AVX2_SHADE(fg, shade_constants) { \ + __m256i fg_hi = _mm256_unpackhi_epi8(fg, _mm256_setzero_si256()); \ + __m256i fg_lo = _mm256_unpacklo_epi8(fg, _mm256_setzero_si256()); \ + \ + __m256i intensity_hi = _mm256_mullo_epi16(fg_hi, _mm256_set_epi16(0, 77, 143, 37, 0, 77, 143, 37, 0, 77, 143, 37, 0, 77, 143, 37)); \ + __m256i intensity_lo = _mm256_mullo_epi16(fg_lo, _mm256_set_epi16(0, 77, 143, 37, 0, 77, 143, 37, 0, 77, 143, 37, 0, 77, 143, 37)); \ + __m256i intensity = _mm256_mullo_epi16(_mm256_srli_epi16(_mm256_hadd_epi16(_mm256_hadd_epi16(intensity_lo, intensity_hi), _mm256_setzero_si256()), 8), desaturate); \ + intensity = _mm256_unpacklo_epi16(intensity, intensity); \ + intensity_hi = _mm256_unpackhi_epi32(intensity, intensity); \ + intensity_lo = _mm256_unpacklo_epi32(intensity, intensity); \ + \ + fg_hi = _mm256_srli_epi16(_mm256_adds_epu16(_mm256_mullo_epi16(fg_hi, inv_desaturate), intensity_hi), 8); \ + fg_hi = _mm256_srli_epi16(_mm256_adds_epu16(_mm256_mullo_epi16(fg_hi, mlight), fade_amount), 8); \ + fg_hi = _mm256_srli_epi16(_mm256_mullo_epi16(fg_hi, color), 8); \ + \ + fg_lo = _mm256_srli_epi16(_mm256_adds_epu16(_mm256_mullo_epi16(fg_lo, inv_desaturate), intensity_lo), 8); \ + fg_lo = _mm256_srli_epi16(_mm256_adds_epu16(_mm256_mullo_epi16(fg_lo, mlight), fade_amount), 8); \ + fg_lo = _mm256_srli_epi16(_mm256_mullo_epi16(fg_lo, color), 8); \ + \ + fg = _mm256_packus_epi16(fg_lo, fg_hi); \ +} + + + + + // Calculate constants for a simple shade #define SSE_SHADE_SIMPLE_INIT(light) \ __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 2311cb447..269dd9d9d 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -59,7 +59,7 @@ class RtCopy1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; public: @@ -123,9 +123,9 @@ class RtMap1colRGBACommand : public DrawerCommand int yh; fixed_t _light; ShadeConstants _shade_constants; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; - BYTE *_colormap; + BYTE * RESTRICT _colormap; public: RtMap1colRGBACommand(int hx, int sx, int yl, int yh) @@ -188,9 +188,9 @@ class RtMap4colsRGBACommand : public DrawerCommand int yh; fixed_t _light; ShadeConstants _shade_constants; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; - BYTE *_colormap; + BYTE * RESTRICT _colormap; public: RtMap4colsRGBACommand(int sx, int yl, int yh) @@ -383,7 +383,7 @@ public: class RtTranslate1colRGBACommand : public DrawerCommand { - const BYTE *translation; + const BYTE * RESTRICT translation; int hx; int yl; int yh; @@ -447,7 +447,7 @@ public: class RtTranslate4colsRGBACommand : public DrawerCommand { - const BYTE *translation; + const BYTE * RESTRICT translation; int yl; int yh; @@ -507,13 +507,13 @@ class RtAdd1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; fixed_t _light; ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - BYTE *_colormap; + BYTE * RESTRICT _colormap; public: RtAdd1colRGBACommand(int hx, int sx, int yl, int yh) @@ -583,11 +583,11 @@ class RtAdd4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; fixed_t _light; ShadeConstants _shade_constants; - BYTE *_colormap; + BYTE * RESTRICT _colormap; fixed_t _srcalpha; fixed_t _destalpha; @@ -764,8 +764,8 @@ class RtShaded1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - lighttable_t *_colormap; - BYTE *_destorg; + lighttable_t * RESTRICT _colormap; + BYTE * RESTRICT _destorg; int _pitch; int _color; fixed_t _light; @@ -833,9 +833,9 @@ class RtShaded4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - lighttable_t *_colormap; + lighttable_t * RESTRICT _colormap; int _color; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; fixed_t _light; @@ -957,7 +957,7 @@ class RtAddClamp1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; fixed_t _light; ShadeConstants _shade_constants; @@ -1029,7 +1029,7 @@ class RtAddClamp4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; fixed_t _light; fixed_t _srcalpha; @@ -1205,7 +1205,7 @@ class RtSubClamp1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; fixed_t _light; fixed_t _srcalpha; @@ -1277,7 +1277,7 @@ class RtSubClamp4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; fixed_t _light; fixed_t _srcalpha; @@ -1353,7 +1353,7 @@ class RtRevSubClamp1colRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; fixed_t _light; fixed_t _srcalpha; @@ -1425,7 +1425,7 @@ class RtRevSubClamp4colsRGBACommand : public DrawerCommand int sx; int yl; int yh; - BYTE *_destorg; + BYTE * RESTRICT _destorg; int _pitch; fixed_t _light; fixed_t _srcalpha; @@ -1497,7 +1497,7 @@ public: class RtInitColsRGBACommand : public DrawerCommand { - BYTE *buff; + BYTE * RESTRICT buff; public: RtInitColsRGBACommand(BYTE *buff) @@ -1516,7 +1516,7 @@ class DrawColumnHorizRGBACommand : public DrawerCommand int _count; fixed_t _iscale; fixed_t _texturefrac; - const BYTE *_source; + const BYTE * RESTRICT _source; int _x; int _yl; int _yh; From 3f905197d09e224db664264c9c8534985ca4c7df Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 19 Jun 2016 07:40:01 +0200 Subject: [PATCH 059/100] Moved vectorized drawers to their own files --- src/r_draw_rgba.cpp | 701 +++-------------------------------------- src/r_draw_rgba.h | 161 +++++++--- src/r_draw_rgba_sse.h | 491 +++++++++++++++++++++++++++++ src/r_drawt_rgba.cpp | 443 ++++---------------------- src/r_drawt_rgba_sse.h | 495 +++++++++++++++++++++++++++++ 5 files changed, 1212 insertions(+), 1079 deletions(-) create mode 100644 src/r_draw_rgba_sse.h create mode 100644 src/r_drawt_rgba_sse.h diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 271250855..28c5df2ac 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -48,10 +48,6 @@ #endif #include -#ifdef _MSC_VER -#pragma warning(disable: 4752) // warning C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX -#endif - extern int vlinebits; extern int mvlinebits; extern int tmvlinebits; @@ -62,8 +58,38 @@ extern float rw_lightstep; extern int wallshade; CVAR(Bool, r_multithreaded, true, 0) +CVAR(Bool, r_linearlight, false, 0) -//#define USE_AVX // Use AVX2 256 bit intrinsics (requires Haswell or newer) +#ifndef NO_SSE + +// Generate SSE drawers: +#define VecCommand(name) name##_SSE_Command +#define VEC_SHADE_SIMPLE_INIT SSE_SHADE_SIMPLE_INIT +#define VEC_SHADE_SIMPLE_INIT4 SSE_SHADE_SIMPLE_INIT4 +#define VEC_SHADE_SIMPLE SSE_SHADE_SIMPLE +#define VEC_SHADE_INIT SSE_SHADE_INIT +#define VEC_SHADE_INIT4 SSE_SHADE_INIT4 +#define VEC_SHADE SSE_SHADE +#include "r_draw_rgba_sse.h" + +// Generate AVX drawers: +#undef VecCommand +#undef VEC_SHADE_SIMPLE_INIT +#undef VEC_SHADE_SIMPLE_INIT4 +#undef VEC_SHADE_SIMPLE +#undef VEC_SHADE_INIT +#undef VEC_SHADE_INIT4 +#undef VEC_SHADE +#define VecCommand(name) name##_AVX_Command +#define VEC_SHADE_SIMPLE_INIT AVX_LINEAR_SHADE_SIMPLE_INIT +#define VEC_SHADE_SIMPLE_INIT4 AVX_LINEAR_SHADE_SIMPLE_INIT4 +#define VEC_SHADE_SIMPLE AVX_LINEAR_SHADE_SIMPLE +#define VEC_SHADE_INIT AVX_LINEAR_SHADE_INIT +#define VEC_SHADE_INIT4 AVX_LINEAR_SHADE_INIT4 +#define VEC_SHADE AVX_LINEAR_SHADE +#include "r_draw_rgba_sse.h" + +#endif ///////////////////////////////////////////////////////////////////////////// @@ -1495,7 +1521,6 @@ public: _shade_constants = ds_shade_constants; } -#ifdef NO_SSE void Execute(DrawerThread *thread) override { if (thread->line_skipped_by_thread(_y)) @@ -1560,401 +1585,6 @@ public: } while (--count); } } -#elif defined(USE_AVX) - void Execute(DrawerThread *thread) override - { - if (thread->line_skipped_by_thread(_y)) - return; - - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const uint32_t* source = _source; - int count; - int spot; - - xfrac = _xfrac; - yfrac = _yfrac; - - dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - - count = _x2 - _x1 + 1; - - xstep = _xstep; - ystep = _ystep; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - if (_xbits == 6 && _ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - - int sse_count = count / 8; - count -= sse_count * 8; - - if (shade_constants.simple_shade) - { - AVX2_SHADE_SIMPLE_INIT(light); - - while (sse_count--) - { - uint32_t fg_pixels[8]; - for (int i = 0; i < 8; i++) - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - fg_pixels[i] = source[spot]; - xfrac += xstep; - yfrac += ystep; - } - - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - __m256i fg = _mm256_loadu_si256((const __m256i*)fg_pixels); - AVX2_SHADE_SIMPLE(fg); - _mm256_storeu_si256((__m256i*)dest, fg); - - // Next step in u,v. - dest += 8; - } - } - else - { - AVX2_SHADE_INIT(light, shade_constants); - - while (sse_count--) - { - uint32_t fg_pixels[8]; - for (int i = 0; i < 8; i++) - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - fg_pixels[i] = source[spot]; - xfrac += xstep; - yfrac += ystep; - } - - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - __m256i fg = _mm256_loadu_si256((const __m256i*)fg_pixels); - AVX2_SHADE(fg, shade_constants); - _mm256_storeu_si256((__m256i*)dest, fg); - - // Next step in u,v. - dest += 8; - } - } - - if (count == 0) - return; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; - - int sse_count = count / 8; - count -= sse_count * 8; - - if (shade_constants.simple_shade) - { - AVX2_SHADE_SIMPLE_INIT(light); - - while (sse_count--) - { - uint32_t fg_pixels[8]; - for (int i = 0; i < 8; i++) - { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - fg_pixels[i] = source[spot]; - xfrac += xstep; - yfrac += ystep; - } - - // Lookup pixel from flat texture tile - __m256i fg = _mm256_loadu_si256((const __m256i*)fg_pixels); - AVX2_SHADE_SIMPLE(fg); - _mm256_storeu_si256((__m256i*)dest, fg); - dest += 8; - } - } - else - { - AVX2_SHADE_INIT(light, shade_constants); - - while (sse_count--) - { - uint32_t fg_pixels[8]; - for (int i = 0; i < 8; i++) - { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - fg_pixels[i] = source[spot]; - xfrac += xstep; - yfrac += ystep; - } - - // Lookup pixel from flat texture tile - __m256i fg = _mm256_loadu_si256((const __m256i*)fg_pixels); - AVX2_SHADE_SIMPLE(fg); - _mm256_storeu_si256((__m256i*)dest, fg); - dest += 4; - } - } - - if (count == 0) - return; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - } -#else - void Execute(DrawerThread *thread) override - { - if (thread->line_skipped_by_thread(_y)) - return; - - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const uint32_t* source = _source; - int count; - int spot; - - xfrac = _xfrac; - yfrac = _yfrac; - - dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - - count = _x2 - _x1 + 1; - - xstep = _xstep; - ystep = _ystep; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - if (_xbits == 6 && _ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - - int sse_count = count / 4; - count -= sse_count * 4; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT(light); - - while (sse_count--) - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - - // Next step in u,v. - dest += 4; - } - } - else - { - SSE_SHADE_INIT(light, shade_constants); - - while (sse_count--) - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - - // Next step in u,v. - dest += 4; - } - } - - if (count == 0) - return; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - else - { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; - - int sse_count = count / 4; - count -= sse_count * 4; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT(light); - - while (sse_count--) - { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += 4; - } - } - else - { - SSE_SHADE_INIT(light, shade_constants); - - while (sse_count--) - { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - dest += 4; - } - } - - if (count == 0) - return; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); - } - } -#endif }; class DrawSpanMaskedRGBACommand : public DrawerCommand @@ -2698,7 +2328,6 @@ public: } } -#ifdef NO_SSE void Execute(DrawerThread *thread) override { int count = thread->count_for_thread(_dest_y, _count); @@ -2735,165 +2364,6 @@ public: dest += pitch; } while (--count); } -#elif defined(USE_AVX) - void Execute(DrawerThread *thread) override - { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int bits = vlinebits; - int pitch = _pitch * thread->num_cores; - - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - - if (count & 1) - { - DWORD place; - dest[0] = shade_bgra(bufplce[0][(place = local_vplce[0]) >> bits], light0, shade_constants); local_vplce[0] = place + local_vince[0]; - dest[1] = shade_bgra(bufplce[1][(place = local_vplce[1]) >> bits], light1, shade_constants); local_vplce[1] = place + local_vince[1]; - dest[2] = shade_bgra(bufplce[2][(place = local_vplce[2]) >> bits], light2, shade_constants); local_vplce[2] = place + local_vince[2]; - dest[3] = shade_bgra(bufplce[3][(place = local_vplce[3]) >> bits], light3, shade_constants); local_vplce[3] = place + local_vince[3]; - dest += pitch; - } - count /= 2; - - // Assume all columns come from the same texture (which they do): - const uint32_t *base_addr = MIN(MIN(MIN(bufplce[0], bufplce[1]), bufplce[2]), bufplce[3]); - __m256i column_offsets = _mm256_set_epi32( - bufplce[3] - base_addr, bufplce[2] - base_addr, bufplce[1] - base_addr, bufplce[0] - base_addr, - bufplce[3] - base_addr, bufplce[2] - base_addr, bufplce[1] - base_addr, bufplce[0] - base_addr); - - __m256i place = _mm256_set_epi32( - local_vplce[3] + local_vince[3], local_vplce[2] + local_vince[2], local_vplce[1] + local_vince[1], local_vplce[0] + local_vince[0], - local_vplce[3], local_vplce[2], local_vplce[1], local_vplce[0]); - - __m256i step = _mm256_set_epi32( - local_vince[3], local_vince[2], local_vince[1], local_vince[0], - local_vince[3], local_vince[2], local_vince[1], local_vince[0]); - step = _mm256_add_epi32(step, step); - - if (shade_constants.simple_shade) - { - AVX2_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - while (count--) - { - __m256i fg = _mm256_i32gather_epi32((const int *)base_addr, _mm256_add_epi32(column_offsets, _mm256_srli_epi32(place, bits)), 4); - place = _mm256_add_epi32(place, step); - AVX2_SHADE_SIMPLE(fg); - _mm256_storeu2_m128i((__m128i*)(dest + pitch), (__m128i*)dest, fg); - dest += pitch * 2; - } - } - else - { - AVX2_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - while (count--) - { - __m256i fg = _mm256_i32gather_epi32((const int *)base_addr, _mm256_add_epi32(column_offsets, _mm256_srai_epi32(place, bits)), 4); - place = _mm256_add_epi32(place, step); - AVX2_SHADE(fg, shade_constants); - _mm256_storeu2_m128i((__m128i*)(dest + pitch), (__m128i*)dest, fg); - dest += pitch * 2; - } - } - } -#else - void Execute(DrawerThread *thread) override - { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int bits = vlinebits; - int pitch = _pitch * thread->num_cores; - - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - uint32_t p0 = bufplce[0][place0 >> bits]; - uint32_t p1 = bufplce[1][place1 >> bits]; - uint32_t p2 = bufplce[2][place2 >> bits]; - uint32_t p3 = bufplce[3][place3 >> bits]; - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); - } - else - { - SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - uint32_t p0 = bufplce[0][place0 >> bits]; - uint32_t p1 = bufplce[1][place1 >> bits]; - uint32_t p2 = bufplce[2][place2 >> bits]; - uint32_t p3 = bufplce[3][place3 >> bits]; - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); - } - } -#endif }; class Mvlinec1RGBACommand : public DrawerCommand @@ -2980,7 +2450,6 @@ public: } } -#ifdef NO_SSE void Execute(DrawerThread *thread) override { int count = thread->count_for_thread(_dest_y, _count); @@ -3018,93 +2487,6 @@ public: dest += pitch; } while (--count); } -#else - void Execute(DrawerThread *thread) override - { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - int bits = mvlinebits; - - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - uint32_t pix0 = bufplce[0][place0 >> bits]; - uint32_t pix1 = bufplce[1][place1 >> bits]; - uint32_t pix2 = bufplce[2][place2 >> bits]; - uint32_t pix3 = bufplce[3][place3 >> bits]; - - // movemask = !(pix == 0) - __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - SSE_SHADE_SIMPLE(fg); - _mm_maskmoveu_si128(fg, movemask, (char*)dest); - dest += pitch; - } while (--count); - } - else - { - SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - uint32_t pix0 = bufplce[0][place0 >> bits]; - uint32_t pix1 = bufplce[1][place1 >> bits]; - uint32_t pix2 = bufplce[2][place2 >> bits]; - uint32_t pix3 = bufplce[3][place3 >> bits]; - - // movemask = !(pix == 0) - __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - SSE_SHADE(fg, shade_constants); - _mm_maskmoveu_si128(fg, movemask, (char*)dest); - dest += pitch; - } while (--count); - } - } -#endif }; class Tmvline1AddRGBACommand : public DrawerCommand @@ -4254,7 +3636,14 @@ void R_DrawRevSubClampTranslatedColumn_rgba() void R_DrawSpan_rgba() { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); +#endif } void R_DrawSpanMasked_rgba() @@ -4304,7 +3693,14 @@ DWORD vlinec1_rgba() void vlinec4_rgba() { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } @@ -4317,7 +3713,14 @@ DWORD mvlinec1_rgba() void mvlinec4_rgba() { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 8f051b4cb..174478162 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -424,59 +424,124 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) return 0xff000000 | (red << 16) | (green << 8) | blue; } -// Calculate constants for a simple shade -#define AVX2_SHADE_SIMPLE_INIT(light) \ - __m256i mlight = _mm256_set_epi16(256, light, light, light, 256, light, light, light, 256, light, light, light, 256, light, light, light); +// Calculate constants for a simple shade with gamma correction +#define AVX_LINEAR_SHADE_SIMPLE_INIT(light) \ + __m256 mlight_hi = _mm256_set_ps(1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f), 1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f)); \ + mlight_hi = _mm256_mul_ps(mlight_hi, mlight_hi); \ + __m256 mlight_lo = mlight_hi; \ + __m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \ + __m256 m255 = _mm256_set1_ps(255.0f); -// Calculate constants for a simple shade with different light levels for each pixel -#define AVX2_SHADE_SIMPLE_INIT4(light3, light2, light1, light0) \ - __m256i mlight = _mm256_set_epi16(256, light3, light3, light3, 256, light2, light2, light2, 256, light1, light1, light1, 256, light0, light0, light0); +// Calculate constants for a simple shade with different light levels for each pixel and gamma correction +#define AVX_LINEAR_SHADE_SIMPLE_INIT4(light3, light2, light1, light0) \ + __m256 mlight_hi = _mm256_set_ps(1.0f, light1 * (1.0f/256.0f), light1 * (1.0f/256.0f), light1 * (1.0f/256.0f), 1.0f, light0 * (1.0f/256.0f), light0 * (1.0f/256.0f), light0 * (1.0f/256.0f)); \ + __m256 mlight_lo = _mm256_set_ps(1.0f, light3 * (1.0f/256.0f), light3 * (1.0f/256.0f), light3 * (1.0f/256.0f), 1.0f, light2 * (1.0f/256.0f), light2 * (1.0f/256.0f), light2 * (1.0f/256.0f)); \ + mlight_hi = _mm256_mul_ps(mlight_hi, mlight_hi); \ + mlight_lo = _mm256_mul_ps(mlight_lo, mlight_lo); \ + __m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \ + __m256 m255 = _mm256_set1_ps(255.0f); -// Simple shade 8 pixels -#define AVX2_SHADE_SIMPLE(fg) { \ - __m256i fg_hi = _mm256_unpackhi_epi8(fg, _mm256_setzero_si256()); \ - __m256i fg_lo = _mm256_unpacklo_epi8(fg, _mm256_setzero_si256()); \ - fg_hi = _mm256_mullo_epi16(fg_hi, mlight); \ - fg_hi = _mm256_srli_epi16(fg_hi, 8); \ - fg_lo = _mm256_mullo_epi16(fg_lo, mlight); \ - fg_lo = _mm256_srli_epi16(fg_lo, 8); \ - fg = _mm256_packus_epi16(fg_lo, fg_hi); \ +// Simple shade 4 pixels with gamma correction +#define AVX_LINEAR_SHADE_SIMPLE(fg) { \ + __m256i fg_16 = _mm256_set_m128i(_mm_unpackhi_epi8(fg, _mm_setzero_si128()), _mm_unpacklo_epi8(fg, _mm_setzero_si128())); \ + __m256 fg_hi = _mm256_cvtepi32_ps(_mm256_unpackhi_epi16(fg_16, _mm256_setzero_si256())); \ + __m256 fg_lo = _mm256_cvtepi32_ps(_mm256_unpacklo_epi16(fg_16, _mm256_setzero_si256())); \ + fg_hi = _mm256_mul_ps(fg_hi, mrcp_255); \ + fg_hi = _mm256_mul_ps(fg_hi, fg_hi); \ + fg_hi = _mm256_mul_ps(fg_hi, mlight_hi); \ + fg_hi = _mm256_sqrt_ps(fg_hi); \ + fg_hi = _mm256_mul_ps(fg_hi, m255); \ + fg_lo = _mm256_mul_ps(fg_lo, mrcp_255); \ + fg_lo = _mm256_mul_ps(fg_lo, fg_lo); \ + fg_lo = _mm256_mul_ps(fg_lo, mlight_lo); \ + fg_lo = _mm256_sqrt_ps(fg_lo); \ + fg_lo = _mm256_mul_ps(fg_lo, m255); \ + fg_16 = _mm256_packus_epi32(_mm256_cvtps_epi32(fg_lo), _mm256_cvtps_epi32(fg_hi)); \ + fg = _mm_packus_epi16(_mm256_extractf128_si256(fg_16, 0), _mm256_extractf128_si256(fg_16, 1)); \ } -// Calculate constants for a complex shade -#define AVX2_SHADE_INIT(light, shade_constants) \ - __m256i mlight = _mm256_set_epi16(256, light, light, light, 256, light, light, light, 256, light, light, light, 256, light, light, light); \ - __m256i color = _mm256_set_epi16( \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ - __m256i fade = _mm256_set_epi16( \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ - __m256i fade_amount = _mm256_mullo_epi16(fade, _mm256_subs_epu16(_mm256_set1_epi16(256), mlight)); \ - __m256i desaturate = _mm256_set1_epi16(shade_constants.desaturate); \ - __m256i inv_desaturate = _mm256_set1_epi16(256 - shade_constants.desaturate); +// Calculate constants for a complex shade with gamma correction +#define AVX_LINEAR_SHADE_INIT(light, shade_constants) \ + __m256 mlight_hi = _mm256_set_ps(1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f), 1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f)); \ + mlight_hi = _mm256_mul_ps(mlight_hi, mlight_hi); \ + __m256 mlight_lo = mlight_hi; \ + __m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \ + __m256 m255 = _mm256_set1_ps(255.0f); \ + __m256 color = _mm256_set_ps( \ + shade_constants.light_alpha * (1.0f/256.0f), shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f), \ + shade_constants.light_alpha * (1.0f/256.0f), shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f)); \ + __m256 fade = _mm256_set_ps( \ + shade_constants.fade_alpha * (1.0f/256.0f), shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f), \ + shade_constants.fade_alpha * (1.0f/256.0f), shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f)); \ + __m256 fade_amount_hi = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_hi)); \ + __m256 fade_amount_lo = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_lo)); \ + __m256 inv_desaturate = _mm256_set1_ps((256 - shade_constants.desaturate) * (1.0f/256.0f)); \ + __m128 ss_desaturate = _mm_set_ss(shade_constants.desaturate * (1.0f/256.0f)); \ + __m128 intensity_weight = _mm_set_ps(0.0f, 77.0f/256.0f, 143.0f/256.0f, 37.0f/256.0f); -// Calculate constants for a complex shade with different light levels for each pixel -#define AVX2_SHADE_INIT4(light3, light2, light1, light0, shade_constants) \ - __m256i mlight = _mm256_set_epi16(256, light3, light3, light3, 256, light2, light2, light2, 256, light1, light1, light1, 256, light0, light0, light0); \ - __m256i color = _mm256_set_epi16( \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ - __m256i fade = _mm256_set_epi16( \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ - __m256i fade_amount = _mm256_mullo_epi16(fade, _mm256_subs_epu16(_mm256_set1_epi16(256), mlight)); \ - __m256i desaturate = _mm256_set1_epi16(shade_constants.desaturate); \ - __m256i inv_desaturate = _mm256_set1_epi16(256 - shade_constants.desaturate); +// Calculate constants for a complex shade with different light levels for each pixel and gamma correction +#define AVX_LINEAR_SHADE_INIT4(light3, light2, light1, light0, shade_constants) \ + __m256 mlight_hi = _mm256_set_ps(1.0f, light1 * (1.0f/256.0f), light1 * (1.0f/256.0f), light1 * (1.0f/256.0f), 1.0f, light0 * (1.0f/256.0f), light0 * (1.0f/256.0f), light0 * (1.0f/256.0f)); \ + __m256 mlight_lo = _mm256_set_ps(1.0f, light3 * (1.0f/256.0f), light3 * (1.0f/256.0f), light3 * (1.0f/256.0f), 1.0f, light2 * (1.0f/256.0f), light2 * (1.0f/256.0f), light2 * (1.0f/256.0f)); \ + mlight_hi = _mm256_mul_ps(mlight_hi, mlight_hi); \ + mlight_lo = _mm256_mul_ps(mlight_lo, mlight_lo); \ + __m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \ + __m256 m255 = _mm256_set1_ps(255.0f); \ + __m256 color = _mm256_set_ps( \ + shade_constants.light_alpha * (1.0f/256.0f), shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f), \ + shade_constants.light_alpha * (1.0f/256.0f), shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f)); \ + __m256 fade = _mm256_set_ps( \ + shade_constants.fade_alpha * (1.0f/256.0f), shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f), \ + shade_constants.fade_alpha * (1.0f/256.0f), shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f)); \ + __m256 fade_amount_hi = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_hi)); \ + __m256 fade_amount_lo = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_lo)); \ + __m256 inv_desaturate = _mm256_set1_ps((256 - shade_constants.desaturate) * (1.0f/256.0f)); \ + __m128 ss_desaturate = _mm_set_ss(shade_constants.desaturate * (1.0f/256.0f)); \ + __m128 intensity_weight = _mm_set_ps(0.0f, 77.0f/256.0f, 143.0f/256.0f, 37.0f/256.0f); +// Complex shade 4 pixels with gamma correction +#define AVX_LINEAR_SHADE(fg, shade_constants) { \ + __m256i fg_16 = _mm256_set_m128i(_mm_unpackhi_epi8(fg, _mm_setzero_si128()), _mm_unpacklo_epi8(fg, _mm_setzero_si128())); \ + __m256 fg_hi = _mm256_cvtepi32_ps(_mm256_unpackhi_epi16(fg_16, _mm256_setzero_si256())); \ + __m256 fg_lo = _mm256_cvtepi32_ps(_mm256_unpacklo_epi16(fg_16, _mm256_setzero_si256())); \ + fg_hi = _mm256_mul_ps(fg_hi, mrcp_255); \ + fg_hi = _mm256_mul_ps(fg_hi, fg_hi); \ + fg_lo = _mm256_mul_ps(fg_lo, mrcp_255); \ + fg_lo = _mm256_mul_ps(fg_lo, fg_lo); \ + \ + __m128 intensity_hi0 = _mm_mul_ps(_mm256_extractf128_ps(fg_hi, 0), intensity_weight); \ + __m128 intensity_hi1 = _mm_mul_ps(_mm256_extractf128_ps(fg_hi, 1), intensity_weight); \ + intensity_hi0 = _mm_mul_ss(_mm_add_ss(_mm_add_ss(intensity_hi0, _mm_shuffle_ps(intensity_hi0, intensity_hi0, _MM_SHUFFLE(1,1,1,1))), _mm_shuffle_ps(intensity_hi0, intensity_hi0, _MM_SHUFFLE(2,2,2,2))), ss_desaturate); \ + intensity_hi0 = _mm_shuffle_ps(intensity_hi0, intensity_hi0, _MM_SHUFFLE(0,0,0,0)); \ + intensity_hi1 = _mm_mul_ss(_mm_add_ss(_mm_add_ss(intensity_hi1, _mm_shuffle_ps(intensity_hi1, intensity_hi1, _MM_SHUFFLE(1,1,1,1))), _mm_shuffle_ps(intensity_hi1, intensity_hi1, _MM_SHUFFLE(2,2,2,2))), ss_desaturate); \ + intensity_hi1 = _mm_shuffle_ps(intensity_hi1, intensity_hi1, _MM_SHUFFLE(0,0,0,0)); \ + __m256 intensity_hi = _mm256_set_m128(intensity_hi1, intensity_hi0); \ + \ + fg_hi = _mm256_add_ps(_mm256_mul_ps(fg_hi, inv_desaturate), intensity_hi); \ + fg_hi = _mm256_add_ps(_mm256_mul_ps(fg_hi, mlight_hi), fade_amount_hi); \ + fg_hi = _mm256_mul_ps(fg_hi, color); \ + \ + __m128 intensity_lo0 = _mm_mul_ps(_mm256_extractf128_ps(fg_lo, 0), intensity_weight); \ + __m128 intensity_lo1 = _mm_mul_ps(_mm256_extractf128_ps(fg_lo, 1), intensity_weight); \ + intensity_lo0 = _mm_mul_ss(_mm_add_ss(_mm_add_ss(intensity_lo0, _mm_shuffle_ps(intensity_lo0, intensity_lo0, _MM_SHUFFLE(1,1,1,1))), _mm_shuffle_ps(intensity_lo0, intensity_lo0, _MM_SHUFFLE(2,2,2,2))), ss_desaturate); \ + intensity_lo0 = _mm_shuffle_ps(intensity_lo0, intensity_lo0, _MM_SHUFFLE(0,0,0,0)); \ + intensity_lo1 = _mm_mul_ss(_mm_add_ss(_mm_add_ss(intensity_lo1, _mm_shuffle_ps(intensity_lo1, intensity_lo1, _MM_SHUFFLE(1,1,1,1))), _mm_shuffle_ps(intensity_lo1, intensity_lo1, _MM_SHUFFLE(2,2,2,2))), ss_desaturate); \ + intensity_lo1 = _mm_shuffle_ps(intensity_lo1, intensity_lo1, _MM_SHUFFLE(0,0,0,0)); \ + __m256 intensity_lo = _mm256_set_m128(intensity_lo1, intensity_lo0); \ + \ + fg_lo = _mm256_add_ps(_mm256_mul_ps(fg_lo, inv_desaturate), intensity_lo); \ + fg_lo = _mm256_add_ps(_mm256_mul_ps(fg_lo, mlight_lo), fade_amount_lo); \ + fg_lo = _mm256_mul_ps(fg_lo, color); \ + \ + fg_hi = _mm256_sqrt_ps(fg_hi); \ + fg_hi = _mm256_mul_ps(fg_hi, m255); \ + fg_lo = _mm256_sqrt_ps(fg_lo); \ + fg_lo = _mm256_mul_ps(fg_lo, m255); \ + fg_16 = _mm256_packus_epi32(_mm256_cvtps_epi32(fg_lo), _mm256_cvtps_epi32(fg_hi)); \ + fg = _mm_packus_epi16(_mm256_extractf128_si256(fg_16, 0), _mm256_extractf128_si256(fg_16, 1)); \ +} + +/* // Complex shade 8 pixels #define AVX2_SHADE(fg, shade_constants) { \ __m256i fg_hi = _mm256_unpackhi_epi8(fg, _mm256_setzero_si256()); \ @@ -499,7 +564,7 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) \ fg = _mm256_packus_epi16(fg_lo, fg_hi); \ } - +*/ diff --git a/src/r_draw_rgba_sse.h b/src/r_draw_rgba_sse.h new file mode 100644 index 000000000..14ebbbb41 --- /dev/null +++ b/src/r_draw_rgba_sse.h @@ -0,0 +1,491 @@ +// +// SSE/AVX intrinsics based drawers for the r_draw family of drawers. +// +// Note: This header file is intentionally not guarded by a __R_DRAW_RGBA_SSE__ define. +// It is because the code is nearly identical for SSE vs AVX. The file is included +// multiple times by r_draw_rgba.cpp with different defines that changes the class +// names outputted and the type of intrinsics used. + +#ifdef _MSC_VER +#pragma warning(disable: 4752) // warning C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX +#endif + +class VecCommand(DrawSpanRGBA) : public DrawerCommand +{ + const uint32_t * RESTRICT _source; + fixed_t _xfrac; + fixed_t _yfrac; + fixed_t _xstep; + fixed_t _ystep; + int _x1; + int _x2; + int _y; + int _xbits; + int _ybits; + BYTE * RESTRICT _destorg; + fixed_t _light; + ShadeConstants _shade_constants; + +public: + VecCommand(DrawSpanRGBA)() + { + _source = (const uint32_t*)ds_source; + _xfrac = ds_xfrac; + _yfrac = ds_yfrac; + _xstep = ds_xstep; + _ystep = ds_ystep; + _x1 = ds_x1; + _x2 = ds_x2; + _y = ds_y; + _xbits = ds_xbits; + _ybits = ds_ybits; + _destorg = dc_destorg; + _light = ds_light; + _shade_constants = ds_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + if (thread->line_skipped_by_thread(_y)) + return; + + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + uint32_t* dest; + const uint32_t* source = _source; + int count; + int spot; + + xfrac = _xfrac; + yfrac = _yfrac; + + dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; + + count = _x2 - _x1 + 1; + + xstep = _xstep; + ystep = _ystep; + + uint32_t light = calc_light_multiplier(_light); + ShadeConstants shade_constants = _shade_constants; + + if (_xbits == 6 && _ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + + int sse_count = count / 4; + count -= sse_count * 4; + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT(light); + + while (sse_count--) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + + // Next step in u,v. + dest += 4; + } + } + else + { + VEC_SHADE_INIT(light, shade_constants); + + while (sse_count--) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + + // Next step in u,v. + dest += 4; + } + } + + if (count == 0) + return; + + do + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + + // Lookup pixel from flat texture tile + *dest++ = shade_bgra(source[spot], light, shade_constants); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + + int sse_count = count / 4; + count -= sse_count * 4; + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT(light); + + while (sse_count--) + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += 4; + } + } + else + { + VEC_SHADE_INIT(light, shade_constants); + + while (sse_count--) + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + dest += 4; + } + } + + if (count == 0) + return; + + do + { + // Current texture index in u,v. + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + // Lookup pixel from flat texture tile + *dest++ = shade_bgra(source[spot], light, shade_constants); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + } +}; + +class VecCommand(Vlinec4RGBA) : public DrawerCommand +{ + BYTE * RESTRICT _dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + int vlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const uint32 * RESTRICT bufplce[4]; + +public: + VecCommand(Vlinec4RGBA)() + { + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + vlinebits = ::vlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; + } + } + + void Execute(DrawerThread *thread) override + { + int count = thread->count_for_thread(_dest_y, _count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int bits = vlinebits; + int pitch = _pitch * thread->num_cores; + + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = _shade_constants; + + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + uint32_t p0 = bufplce[0][place0 >> bits]; + uint32_t p1 = bufplce[1][place1 >> bits]; + uint32_t p2 = bufplce[2][place2 >> bits]; + uint32_t p3 = bufplce[3][place3 >> bits]; + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + uint32_t p0 = bufplce[0][place0 >> bits]; + uint32_t p1 = bufplce[1][place1 >> bits]; + uint32_t p2 = bufplce[2][place2 >> bits]; + uint32_t p3 = bufplce[3][place3 >> bits]; + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + dest += pitch; + } while (--count); + } + } +}; + +class VecCommand(Mvlinec4RGBA) : public DrawerCommand +{ + BYTE * RESTRICT _dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + int mvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const uint32 * RESTRICT bufplce[4]; + +public: + VecCommand(Mvlinec4RGBA)() + { + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + mvlinebits = ::mvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; + } + } + + void Execute(DrawerThread *thread) override + { + int count = thread->count_for_thread(_dest_y, _count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; + int bits = mvlinebits; + + uint32_t light0 = calc_light_multiplier(palookuplight[0]); + uint32_t light1 = calc_light_multiplier(palookuplight[1]); + uint32_t light2 = calc_light_multiplier(palookuplight[2]); + uint32_t light3 = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = _shade_constants; + + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + uint32_t pix0 = bufplce[0][place0 >> bits]; + uint32_t pix1 = bufplce[1][place1 >> bits]; + uint32_t pix2 = bufplce[2][place2 >> bits]; + uint32_t pix3 = bufplce[3][place3 >> bits]; + + // movemask = !(pix == 0) + __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + VEC_SHADE_SIMPLE(fg); + _mm_maskmoveu_si128(fg, movemask, (char*)dest); + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + uint32_t pix0 = bufplce[0][place0 >> bits]; + uint32_t pix1 = bufplce[1][place1 >> bits]; + uint32_t pix2 = bufplce[2][place2 >> bits]; + uint32_t pix3 = bufplce[3][place3 >> bits]; + + // movemask = !(pix == 0) + __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + VEC_SHADE(fg, shade_constants); + _mm_maskmoveu_si128(fg, movemask, (char*)dest); + dest += pitch; + } while (--count); + } + } +}; diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 269dd9d9d..4da963430 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -51,6 +51,39 @@ extern unsigned int dc_tspans[4][MAXHEIGHT]; extern unsigned int *dc_ctspan[4]; extern unsigned int *horizspan[4]; +EXTERN_CVAR(Bool, r_linearlight) + +#ifndef NO_SSE + +// Generate SSE drawers: +#define VecCommand(name) name##_SSE_Command +#define VEC_SHADE_SIMPLE_INIT SSE_SHADE_SIMPLE_INIT +#define VEC_SHADE_SIMPLE_INIT4 SSE_SHADE_SIMPLE_INIT4 +#define VEC_SHADE_SIMPLE SSE_SHADE_SIMPLE +#define VEC_SHADE_INIT SSE_SHADE_INIT +#define VEC_SHADE_INIT4 SSE_SHADE_INIT4 +#define VEC_SHADE SSE_SHADE +#include "r_drawt_rgba_sse.h" + +// Generate AVX drawers: +#undef VecCommand +#undef VEC_SHADE_SIMPLE_INIT +#undef VEC_SHADE_SIMPLE_INIT4 +#undef VEC_SHADE_SIMPLE +#undef VEC_SHADE_INIT +#undef VEC_SHADE_INIT4 +#undef VEC_SHADE +#define VecCommand(name) name##_AVX_Command +#define VEC_SHADE_SIMPLE_INIT AVX_LINEAR_SHADE_SIMPLE_INIT +#define VEC_SHADE_SIMPLE_INIT4 AVX_LINEAR_SHADE_SIMPLE_INIT4 +#define VEC_SHADE_SIMPLE AVX_LINEAR_SHADE_SIMPLE +#define VEC_SHADE_INIT AVX_LINEAR_SHADE_INIT +#define VEC_SHADE_INIT4 AVX_LINEAR_SHADE_INIT4 +#define VEC_SHADE AVX_LINEAR_SHADE +#include "r_drawt_rgba_sse.h" + +#endif + ///////////////////////////////////////////////////////////////////////////// class RtCopy1colRGBACommand : public DrawerCommand @@ -206,7 +239,6 @@ public: _colormap = dc_colormap; } -#ifdef NO_SSE void Execute(DrawerThread *thread) override { uint32_t *source; @@ -253,132 +285,6 @@ public: dest += pitch * 2; } while (--count); } -#else - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - ShadeConstants shade_constants = _shade_constants; - uint32_t light = calc_light_multiplier(_light); - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = thread->num_cores * 4; - - BYTE *colormap = _colormap; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT(light); - - if (count & 1) { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - - source += sincr; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - // shade_pal_index 0-3 - { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - } - - // shade_pal_index 4-7 (pitch) - { - uint32_t p0 = colormap[source[sincr]]; - uint32_t p1 = colormap[source[sincr + 1]]; - uint32_t p2 = colormap[source[sincr + 2]]; - uint32_t p3 = colormap[source[sincr + 3]]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)(dest + pitch), fg); - } - - source += sincr * 2; - dest += pitch * 2; - } while (--count); - } - else - { - SSE_SHADE_INIT(light, shade_constants); - - if (count & 1) { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - - source += sincr; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - // shade_pal_index 0-3 - { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - } - - // shade_pal_index 4-7 (pitch) - { - uint32_t p0 = colormap[source[sincr]]; - uint32_t p1 = colormap[source[sincr + 1]]; - uint32_t p2 = colormap[source[sincr + 2]]; - uint32_t p3 = colormap[source[sincr + 3]]; - - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)(dest + pitch), fg); - } - - source += sincr * 2; - dest += pitch * 2; - } while (--count); - } - } -#endif }; class RtTranslate1colRGBACommand : public DrawerCommand @@ -607,7 +513,6 @@ public: _destalpha = dc_destalpha; } -#ifdef NO_SSE void Execute(DrawerThread *thread) override { uint32_t *source; @@ -655,107 +560,6 @@ public: dest += pitch; } while (--count); } -#else - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t light = calc_light_multiplier(_light); - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - BYTE *colormap = _colormap; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - ShadeConstants shade_constants = _shade_constants; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT(light); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += sincr; - dest += pitch; - } while (--count); - } - else - { - SSE_SHADE_INIT(light, shade_constants); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += sincr; - dest += pitch; - } while (--count); - } - } -#endif }; class RtShaded1colRGBACommand : public DrawerCommand @@ -853,7 +657,6 @@ public: _light = dc_light; } -#ifdef NO_SSE void Execute(DrawerThread *thread) override { BYTE *colormap; @@ -898,57 +701,6 @@ public: dest += pitch; } while (--count); } -#else - void Execute(DrawerThread *thread) override - { - BYTE *colormap; - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - colormap = _colormap; - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(_color, calc_light_multiplier(_light))), _mm_setzero_si128()); - __m128i alpha_one = _mm_set1_epi16(64); - - do { - uint32_t p0 = colormap[source[0]]; - uint32_t p1 = colormap[source[1]]; - uint32_t p2 = colormap[source[2]]; - uint32_t p3 = colormap[source[3]]; - - __m128i alpha_hi = _mm_set_epi16(64, p3, p3, p3, 64, p2, p2, p2); - __m128i alpha_lo = _mm_set_epi16(64, p1, p1, p1, 64, p0, p0, p0); - __m128i inv_alpha_hi = _mm_subs_epu16(alpha_one, alpha_hi); - __m128i inv_alpha_lo = _mm_subs_epu16(alpha_one, alpha_lo); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * alpha + bg_red * inv_alpha) / 64: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_hi), _mm_mullo_epi16(bg_hi, inv_alpha_hi)), 6); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_lo), _mm_mullo_epi16(bg_lo, inv_alpha_lo)), 6); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += sincr; - dest += pitch; - } while (--count); - } -#endif }; class RtAddClamp1colRGBACommand : public DrawerCommand @@ -1051,7 +803,6 @@ public: _shade_constants = dc_shade_constants; } -#ifdef NO_SSE void Execute(DrawerThread *thread) override { uint32_t *source; @@ -1097,106 +848,6 @@ public: dest += pitch; } while (--count); } -#else - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t light = calc_light_multiplier(_light); - uint32_t *palette = (uint32_t*)GPalette.BaseColors; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - ShadeConstants shade_constants = _shade_constants; - - if (shade_constants.simple_shade) - { - SSE_SHADE_SIMPLE_INIT(light); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE_SIMPLE(fg); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += sincr; - dest += pitch; - } while (--count); - } - else - { - SSE_SHADE_INIT(light, shade_constants); - - __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); - __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); - - do { - uint32_t p0 = source[0]; - uint32_t p1 = source[1]; - uint32_t p2 = source[2]; - uint32_t p3 = source[3]; - - // shade_pal_index: - __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); - SSE_SHADE(fg, shade_constants); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - // unpack bg: - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: - __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); - __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); - - __m128i color = _mm_packus_epi16(color_lo, color_hi); - _mm_storeu_si128((__m128i*)dest, color); - - source += sincr; - dest += pitch; - } while (--count); - } - } -#endif }; class RtSubClamp1colRGBACommand : public DrawerCommand @@ -1657,7 +1308,14 @@ void rt_map1col_rgba (int hx, int sx, int yl, int yh) // Maps all four spans to the screen starting at sx. void rt_map4cols_rgba (int sx, int yl, int yh) { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(sx, yl, yh); +#endif } void rt_Translate1col_rgba(const BYTE *translation, int hx, int yl, int yh) @@ -1693,7 +1351,14 @@ void rt_add1col_rgba (int hx, int sx, int yl, int yh) // Adds all four spans to the screen starting at sx without clamping. void rt_add4cols_rgba (int sx, int yl, int yh) { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(sx, yl, yh); +#endif } // Translates and adds one span at hx to the screen at sx without clamping. @@ -1719,7 +1384,14 @@ void rt_shaded1col_rgba (int hx, int sx, int yl, int yh) // Shades all four spans to the screen starting at sx. void rt_shaded4cols_rgba (int sx, int yl, int yh) { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(sx, yl, yh); +#endif } // Adds one span at hx to the screen at sx with clamping. @@ -1731,7 +1403,14 @@ void rt_addclamp1col_rgba (int hx, int sx, int yl, int yh) // Adds all four spans to the screen starting at sx with clamping. void rt_addclamp4cols_rgba (int sx, int yl, int yh) { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(sx, yl, yh); +#endif } // Translates and adds one span at hx to the screen at sx with clamping. diff --git a/src/r_drawt_rgba_sse.h b/src/r_drawt_rgba_sse.h new file mode 100644 index 000000000..5b8ae8081 --- /dev/null +++ b/src/r_drawt_rgba_sse.h @@ -0,0 +1,495 @@ +// +// SSE/AVX intrinsics based drawers for the r_drawt family of drawers. +// +// Note: This header file is intentionally not guarded by a __R_DRAWT_RGBA_SSE__ define. +// It is because the code is nearly identical for SSE vs AVX. The file is included +// multiple times by r_drawt_rgba.cpp with different defines that changes the class +// names outputted and the type of intrinsics used. + +#ifdef _MSC_VER +#pragma warning(disable: 4752) // warning C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX +#endif + +class VecCommand(RtMap4colsRGBA) : public DrawerCommand +{ + int sx; + int yl; + int yh; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE * RESTRICT _destorg; + int _pitch; + BYTE * RESTRICT _colormap; + +public: + VecCommand(RtMap4colsRGBA)(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + _light = dc_light; + _shade_constants = dc_shade_constants; + _destorg = dc_destorg; + _pitch = dc_pitch; + _colormap = dc_colormap; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + int sincr; + + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) + return; + + ShadeConstants shade_constants = _shade_constants; + uint32_t light = calc_light_multiplier(_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = _pitch * thread->num_cores; + sincr = thread->num_cores * 4; + + BYTE *colormap = _colormap; + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT(light); + + if (count & 1) { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + + source += sincr; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + // shade_pal_index 0-3 + { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + } + + // shade_pal_index 4-7 (pitch) + { + uint32_t p0 = colormap[source[sincr]]; + uint32_t p1 = colormap[source[sincr + 1]]; + uint32_t p2 = colormap[source[sincr + 2]]; + uint32_t p3 = colormap[source[sincr + 3]]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)(dest + pitch), fg); + } + + source += sincr * 2; + dest += pitch * 2; + } while (--count); + } + else + { + VEC_SHADE_INIT(light, shade_constants); + + if (count & 1) { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + + source += sincr; + dest += pitch; + } + if (!(count >>= 1)) + return; + + do { + // shade_pal_index 0-3 + { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + } + + // shade_pal_index 4-7 (pitch) + { + uint32_t p0 = colormap[source[sincr]]; + uint32_t p1 = colormap[source[sincr + 1]]; + uint32_t p2 = colormap[source[sincr + 2]]; + uint32_t p3 = colormap[source[sincr + 3]]; + + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)(dest + pitch), fg); + } + + source += sincr * 2; + dest += pitch * 2; + } while (--count); + } + } +}; + +class VecCommand(RtAdd4colsRGBA) : public DrawerCommand +{ + int sx; + int yl; + int yh; + BYTE * RESTRICT _destorg; + int _pitch; + fixed_t _light; + ShadeConstants _shade_constants; + BYTE * RESTRICT _colormap; + fixed_t _srcalpha; + fixed_t _destalpha; + +public: + VecCommand(RtAdd4colsRGBA)(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _shade_constants = dc_shade_constants; + _colormap = dc_colormap; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + int sincr; + + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) + return; + + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = _pitch * thread->num_cores; + sincr = 4 * thread->num_cores; + + uint32_t light = calc_light_multiplier(_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + BYTE *colormap = _colormap; + + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + + ShadeConstants shade_constants = _shade_constants; + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT(light); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += sincr; + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT(light, shade_constants); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE(fg, shade_constants); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += sincr; + dest += pitch; + } while (--count); + } + } +}; + +class VecCommand(RtShaded4colsRGBA) : public DrawerCommand +{ + int sx; + int yl; + int yh; + lighttable_t * RESTRICT _colormap; + int _color; + BYTE * RESTRICT _destorg; + int _pitch; + fixed_t _light; + +public: + VecCommand(RtShaded4colsRGBA)(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + _colormap = dc_colormap; + _color = dc_color; + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + } + + void Execute(DrawerThread *thread) override + { + BYTE *colormap; + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + int sincr; + + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) + return; + + colormap = _colormap; + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = _pitch * thread->num_cores; + sincr = 4 * thread->num_cores; + + __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(_color, calc_light_multiplier(_light))), _mm_setzero_si128()); + __m128i alpha_one = _mm_set1_epi16(64); + + do { + uint32_t p0 = colormap[source[0]]; + uint32_t p1 = colormap[source[1]]; + uint32_t p2 = colormap[source[2]]; + uint32_t p3 = colormap[source[3]]; + + __m128i alpha_hi = _mm_set_epi16(64, p3, p3, p3, 64, p2, p2, p2); + __m128i alpha_lo = _mm_set_epi16(64, p1, p1, p1, 64, p0, p0, p0); + __m128i inv_alpha_hi = _mm_subs_epu16(alpha_one, alpha_hi); + __m128i inv_alpha_lo = _mm_subs_epu16(alpha_one, alpha_lo); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * alpha + bg_red * inv_alpha) / 64: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_hi), _mm_mullo_epi16(bg_hi, inv_alpha_hi)), 6); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg, alpha_lo), _mm_mullo_epi16(bg_lo, inv_alpha_lo)), 6); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += sincr; + dest += pitch; + } while (--count); + } +}; + +class VecCommand(RtAddClamp4colsRGBA) : public DrawerCommand +{ + int sx; + int yl; + int yh; + BYTE * RESTRICT _destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; + +public: + VecCommand(RtAddClamp4colsRGBA)(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + int sincr; + + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) + return; + + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = _pitch * thread->num_cores; + sincr = 4 * thread->num_cores; + + uint32_t light = calc_light_multiplier(_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + + ShadeConstants shade_constants = _shade_constants; + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT(light); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += sincr; + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT(light, shade_constants); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE(fg, shade_constants); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha + bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += sincr; + dest += pitch; + } while (--count); + } + } +}; From 38aba81dcc816ce9bb0888f95b94f73714771f67 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 19 Jun 2016 23:11:41 +0200 Subject: [PATCH 060/100] Added more SSE drawers --- src/r_draw_rgba.cpp | 313 ++++++++++++---------- src/r_draw_rgba.h | 58 +++- src/r_draw_rgba_sse.h | 583 ++++++++++++++++++++++++++++++++++++++++- src/r_drawt_rgba.cpp | 14 + src/r_drawt_rgba_sse.h | 252 ++++++++++++++++++ 5 files changed, 1072 insertions(+), 148 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 28c5df2ac..96232ab0c 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -2411,10 +2411,7 @@ public: do { uint32_t pix = source[frac >> bits]; - if (pix != 0) - { - *dest = shade_bgra(pix, light, shade_constants); - } + *dest = alpha_blend(shade_bgra(pix, light, shade_constants), *dest); frac += fracstep; dest += pitch; } while (--count); @@ -2480,10 +2477,10 @@ public: do { uint32_t pix; - pix = bufplce[0][(place = local_vplce[0]) >> bits]; if (pix) dest[0] = shade_bgra(pix, light0, shade_constants); local_vplce[0] = place + local_vince[0]; - pix = bufplce[1][(place = local_vplce[1]) >> bits]; if (pix) dest[1] = shade_bgra(pix, light1, shade_constants); local_vplce[1] = place + local_vince[1]; - pix = bufplce[2][(place = local_vplce[2]) >> bits]; if (pix) dest[2] = shade_bgra(pix, light2, shade_constants); local_vplce[2] = place + local_vince[2]; - pix = bufplce[3][(place = local_vplce[3]) >> bits]; if (pix) dest[3] = shade_bgra(pix, light3, shade_constants); local_vplce[3] = place + local_vince[3]; + pix = bufplce[0][(place = local_vplce[0]) >> bits]; dest[0] = alpha_blend(shade_bgra(pix, light0, shade_constants), dest[0]); local_vplce[0] = place + local_vince[0]; + pix = bufplce[1][(place = local_vplce[1]) >> bits]; dest[1] = alpha_blend(shade_bgra(pix, light1, shade_constants), dest[1]); local_vplce[1] = place + local_vince[1]; + pix = bufplce[2][(place = local_vplce[2]) >> bits]; dest[2] = alpha_blend(shade_bgra(pix, light2, shade_constants), dest[2]); local_vplce[2] = place + local_vince[2]; + pix = bufplce[3][(place = local_vplce[3]) >> bits]; dest[3] = alpha_blend(shade_bgra(pix, light3, shade_constants), dest[3]); local_vplce[3] = place + local_vince[3]; dest += pitch; } while (--count); } @@ -2535,29 +2532,31 @@ public: uint32_t light = calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t pix = source[frac >> bits]; - if (pix != 0) - { - uint32_t fg = shade_bgra(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; + uint32_t fg_alpha, bg_alpha; + calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + uint32_t fg = shade_bgra(pix, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } frac += fracstep; dest += pitch; } while (--count); @@ -2615,8 +2614,8 @@ public: ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; @@ -2632,23 +2631,25 @@ public: for (int i = 0; i < 4; ++i) { uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - if (pix != 0) - { - uint32_t fg = shade_bgra(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; + uint32_t fg_alpha, bg_alpha; + calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + uint32_t fg = shade_bgra(pix, light[i], shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } local_vplce[i] += local_vince[i]; } dest += pitch; @@ -2702,29 +2703,31 @@ public: uint32_t light = calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t pix = source[frac >> bits]; - if (pix != 0) - { - uint32_t fg = shade_bgra(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; + uint32_t fg_alpha, bg_alpha; + calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + uint32_t fg = shade_bgra(pix, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } frac += fracstep; dest += pitch; } while (--count); @@ -2782,8 +2785,8 @@ public: ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; @@ -2799,23 +2802,25 @@ public: for (int i = 0; i < 4; ++i) { uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - if (pix != 0) - { - uint32_t fg = shade_bgra(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; + uint32_t fg_alpha, bg_alpha; + calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + uint32_t fg = shade_bgra(pix, light[i], shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); + uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); + uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } local_vplce[i] += local_vince[i]; } dest += pitch; @@ -2869,29 +2874,31 @@ public: uint32_t light = calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t pix = source[frac >> bits]; - if (pix != 0) - { - uint32_t fg = shade_bgra(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; + uint32_t fg_alpha, bg_alpha; + calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t fg = shade_bgra(pix, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } frac += fracstep; dest += pitch; } while (--count); @@ -2949,8 +2956,8 @@ public: ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; @@ -2966,23 +2973,25 @@ public: for (int i = 0; i < 4; ++i) { uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - if (pix != 0) - { - uint32_t fg = shade_bgra(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; + uint32_t fg_alpha, bg_alpha; + calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t fg = shade_bgra(pix, light[i], shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } local_vplce[i] += local_vince[i]; } dest += pitch; @@ -3036,29 +3045,31 @@ public: uint32_t light = calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); do { uint32_t pix = source[frac >> bits]; - if (pix != 0) - { - uint32_t fg = shade_bgra(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; + uint32_t fg_alpha, bg_alpha; + calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t fg = shade_bgra(pix, light, shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } frac += fracstep; dest += pitch; } while (--count); @@ -3116,8 +3127,8 @@ public: ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; @@ -3133,23 +3144,25 @@ public: for (int i = 0; i < 4; ++i) { uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - if (pix != 0) - { - uint32_t fg = shade_bgra(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; + uint32_t fg_alpha, bg_alpha; + calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t fg = shade_bgra(pix, light[i], shade_constants); + uint32_t fg_red = (fg >> 16) & 0xff; + uint32_t fg_green = (fg >> 8) & 0xff; + uint32_t fg_blue = fg & 0xff; + + uint32_t bg_red = (dest[i] >> 16) & 0xff; + uint32_t bg_green = (dest[i] >> 8) & 0xff; + uint32_t bg_blue = (dest[i]) & 0xff; + + uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + + dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } local_vplce[i] += local_vince[i]; } dest += pitch; @@ -3733,7 +3746,14 @@ fixed_t tmvline1_add_rgba() void tmvline4_add_rgba() { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } @@ -3746,7 +3766,14 @@ fixed_t tmvline1_addclamp_rgba() void tmvline4_addclamp_rgba() { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } @@ -3759,7 +3786,14 @@ fixed_t tmvline1_subclamp_rgba() void tmvline4_subclamp_rgba() { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } @@ -3772,7 +3806,14 @@ fixed_t tmvline1_revsubclamp_rgba() void tmvline4_revsubclamp_rgba() { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 174478162..66be1f38b 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -417,9 +417,9 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) uint32_t bg_green = (bg >> 8) & 0xff; uint32_t bg_blue = bg & 0xff; - uint32_t red = ((fg_red * alpha) + (bg_red * inv_alpha)) / 256; - uint32_t green = ((fg_green * alpha) + (bg_green * inv_alpha)) / 256; - uint32_t blue = ((fg_blue * alpha) + (bg_blue * inv_alpha)) / 256; + uint32_t red = clamp(fg_red + (bg_red * inv_alpha) / 256, 0, 255); + uint32_t green = clamp(fg_green + (bg_green * inv_alpha) / 256, 0, 255); + uint32_t blue = clamp(fg_blue + (bg_blue * inv_alpha) / 256, 0, 255); return 0xff000000 | (red << 16) | (green << 8) | blue; } @@ -543,7 +543,7 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) /* // Complex shade 8 pixels -#define AVX2_SHADE(fg, shade_constants) { \ +#define AVX_SHADE(fg, shade_constants) { \ __m256i fg_hi = _mm256_unpackhi_epi8(fg, _mm256_setzero_si256()); \ __m256i fg_lo = _mm256_unpacklo_epi8(fg, _mm256_setzero_si256()); \ \ @@ -566,8 +566,58 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) } */ +// Normal premultiplied alpha blend using the alpha from fg +#define VEC_ALPHA_BLEND(fg,bg) { \ + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); \ + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); \ + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); \ + __m128i m255 = _mm_set1_epi16(255); \ + __m128i inv_alpha_hi = _mm_sub_epi16(m255, _mm_shufflehi_epi16(_mm_shufflelo_epi16(fg_hi, _MM_SHUFFLE(3,3,3,3)), _MM_SHUFFLE(3,3,3,3))); \ + __m128i inv_alpha_lo = _mm_sub_epi16(m255, _mm_shufflehi_epi16(_mm_shufflelo_epi16(fg_lo, _MM_SHUFFLE(3,3,3,3)), _MM_SHUFFLE(3,3,3,3))); \ + inv_alpha_hi = _mm_add_epi16(inv_alpha_hi, _mm_srli_epi16(inv_alpha_hi, 7)); \ + inv_alpha_lo = _mm_add_epi16(inv_alpha_lo, _mm_srli_epi16(inv_alpha_lo, 7)); \ + bg_hi = _mm_mullo_epi16(bg_hi, inv_alpha_hi); \ + bg_hi = _mm_srli_epi16(bg_hi, 8); \ + bg_lo = _mm_mullo_epi16(bg_lo, inv_alpha_lo); \ + bg_lo = _mm_srli_epi16(bg_lo, 8); \ + bg = _mm_packus_epi16(bg_lo, bg_hi); \ + fg = _mm_adds_epu8(fg, bg); \ +} +/* +FORCEINLINE void calc_blend_alpha(uint32_t fg, uint32_t src_alpha, uint32_t dest_alpha, uint32_t &fg_alpha, uint32_t &bg_alpha) +{ + fg_alpha = src_alpha; + bg_alpha = dest_alpha; +} +#define VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha) \ + __m128i fg_alpha_hi = msrc_alpha; \ + __m128i fg_alpha_lo = msrc_alpha; \ + __m128i bg_alpha_hi = mdest_alpha; \ + __m128i bg_alpha_lo = mdest_alpha; +*/ + +// Calculates the final alpha values to be used when combined with the source texture alpha channel +FORCEINLINE void calc_blend_alpha(uint32_t fg, uint32_t src_alpha, uint32_t dest_alpha, uint32_t &fg_alpha, uint32_t &bg_alpha) +{ + fg_alpha = (fg >> 24) & 0xff; + fg_alpha += fg_alpha >> 7; + bg_alpha = (dest_alpha * (256 - fg_alpha)) >> 8; + fg_alpha = (src_alpha * fg_alpha) >> 8; +} + +// Calculates the final alpha values to be used when combined with the source texture alpha channel +#define VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha) \ + __m128i fg_alpha_hi = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpackhi_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \ + __m128i fg_alpha_lo = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpacklo_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \ + fg_alpha_hi = _mm_add_epi16(fg_alpha_hi, _mm_srli_epi16(fg_alpha_hi, 7)); \ + fg_alpha_lo = _mm_add_epi16(fg_alpha_lo, _mm_srli_epi16(fg_alpha_lo, 7)); \ + __m128i bg_alpha_hi = _mm_srli_epi16(_mm_mullo_epi16(_mm_sub_epi16(_mm_set1_epi16(256), fg_alpha_hi), mdest_alpha), 8); \ + __m128i bg_alpha_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_sub_epi16(_mm_set1_epi16(256), fg_alpha_lo), mdest_alpha), 8); \ + fg_alpha_hi = _mm_srli_epi16(_mm_mullo_epi16(fg_alpha_hi, msrc_alpha), 8); \ + fg_alpha_lo = _mm_srli_epi16(_mm_mullo_epi16(fg_alpha_lo, msrc_alpha), 8); // Calculate constants for a simple shade #define SSE_SHADE_SIMPLE_INIT(light) \ diff --git a/src/r_draw_rgba_sse.h b/src/r_draw_rgba_sse.h index 14ebbbb41..0597580e1 100644 --- a/src/r_draw_rgba_sse.h +++ b/src/r_draw_rgba_sse.h @@ -444,17 +444,16 @@ public: uint32_t pix2 = bufplce[2][place2 >> bits]; uint32_t pix3 = bufplce[3][place3 >> bits]; - // movemask = !(pix == 0) - __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); - local_vplce[0] = place0 + local_vince[0]; local_vplce[1] = place1 + local_vince[1]; local_vplce[2] = place2 + local_vince[2]; local_vplce[3] = place3 + local_vince[3]; __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + __m128i bg = _mm_loadu_si128((const __m128i*)dest); VEC_SHADE_SIMPLE(fg); - _mm_maskmoveu_si128(fg, movemask, (char*)dest); + VEC_ALPHA_BLEND(fg, bg); + _mm_storeu_si128((__m128i*)dest, fg); dest += pitch; } while (--count); } @@ -473,17 +472,585 @@ public: uint32_t pix2 = bufplce[2][place2 >> bits]; uint32_t pix3 = bufplce[3][place3 >> bits]; - // movemask = !(pix == 0) - __m128i movemask = _mm_xor_si128(_mm_cmpeq_epi32(_mm_set_epi32(pix3, pix2, pix1, pix0), _mm_setzero_si128()), _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())); - local_vplce[0] = place0 + local_vince[0]; local_vplce[1] = place1 + local_vince[1]; local_vplce[2] = place2 + local_vince[2]; local_vplce[3] = place3 + local_vince[3]; __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + __m128i bg = _mm_loadu_si128((const __m128i*)dest); VEC_SHADE(fg, shade_constants); - _mm_maskmoveu_si128(fg, movemask, (char*)dest); + VEC_ALPHA_BLEND(fg, bg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += pitch; + } while (--count); + } + } +}; + +class VecCommand(Tmvline4AddRGBA) : public DrawerCommand +{ + BYTE * RESTRICT _dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + int tmvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const uint32 * RESTRICT bufplce[4]; + +public: + VecCommand(Tmvline4AddRGBA)() + { + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + tmvlinebits = ::tmvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; + } + } + + void Execute(DrawerThread *thread) override + { + int count = thread->count_for_thread(_dest_y, _count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; + int bits = tmvlinebits; + + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = _shade_constants; + + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); + + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); + + __m128i msrc_alpha = _mm_set1_epi16(src_alpha); + __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + + do + { + uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; + uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; + uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; + uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; + + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + + VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_SHADE_SIMPLE(fg); + + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)dest, out); + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); + + __m128i msrc_alpha = _mm_set1_epi16(src_alpha); + __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + + do + { + uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; + uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; + uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; + uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; + + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_SHADE(fg, shade_constants); + + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)dest, out); + dest += pitch; + } while (--count); + } + } +}; + +class VecCommand(Tmvline4AddClampRGBA) : public DrawerCommand +{ + BYTE * RESTRICT _dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + int tmvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const uint32 *RESTRICT bufplce[4]; + +public: + VecCommand(Tmvline4AddClampRGBA)() + { + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + tmvlinebits = ::tmvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; + } + } + + void Execute(DrawerThread *thread) override + { + int count = thread->count_for_thread(_dest_y, _count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; + int bits = tmvlinebits; + + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = _shade_constants; + + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); + + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); + + __m128i msrc_alpha = _mm_set1_epi16(src_alpha); + __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + + do + { + uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; + uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; + uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; + uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; + + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_SHADE_SIMPLE(fg); + + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)dest, out); + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); + + __m128i msrc_alpha = _mm_set1_epi16(src_alpha); + __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + + do + { + uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; + uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; + uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; + uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; + + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_SHADE(fg, shade_constants); + + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)dest, out); + dest += pitch; + } while (--count); + } + } +}; + +class VecCommand(Tmvline4SubClampRGBA) : public DrawerCommand +{ + BYTE * RESTRICT _dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + int tmvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const uint32 *RESTRICT bufplce[4]; + +public: + VecCommand(Tmvline4SubClampRGBA)() + { + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + tmvlinebits = ::tmvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; + } + } + + void Execute(DrawerThread *thread) override + { + int count = thread->count_for_thread(_dest_y, _count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; + int bits = tmvlinebits; + + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = _shade_constants; + + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); + + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); + + __m128i msrc_alpha = _mm_set1_epi16(src_alpha); + __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + + do + { + uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; + uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; + uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; + uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; + + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_SHADE_SIMPLE(fg); + + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_hi, bg_alpha_hi), _mm_mullo_epi16(fg_hi, fg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_lo, bg_alpha_lo), _mm_mullo_epi16(fg_lo, fg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)dest, out); + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); + + __m128i msrc_alpha = _mm_set1_epi16(src_alpha); + __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + + do + { + uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; + uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; + uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; + uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; + + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_SHADE(fg, shade_constants); + + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_hi, bg_alpha_hi), _mm_mullo_epi16(fg_hi, fg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_lo, bg_alpha_lo), _mm_mullo_epi16(fg_lo, fg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)dest, out); + dest += pitch; + } while (--count); + } + } +}; + +class VecCommand(Tmvline4RevSubClampRGBA) : public DrawerCommand +{ + BYTE * RESTRICT _dest; + int _count; + int _pitch; + ShadeConstants _shade_constants; + fixed_t _srcalpha; + fixed_t _destalpha; + int tmvlinebits; + fixed_t palookuplight[4]; + DWORD vplce[4]; + DWORD vince[4]; + const uint32 *RESTRICT bufplce[4]; + +public: + VecCommand(Tmvline4RevSubClampRGBA)() + { + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + tmvlinebits = ::tmvlinebits; + for (int i = 0; i < 4; i++) + { + palookuplight[i] = ::palookuplight[i]; + vplce[i] = ::vplce[i]; + vince[i] = ::vince[i]; + bufplce[i] = (const uint32 *)::bufplce[i]; + } + } + + void Execute(DrawerThread *thread) override + { + int count = thread->count_for_thread(_dest_y, _count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); + int pitch = _pitch * thread->num_cores; + int bits = tmvlinebits; + + uint32_t light[4]; + light[0] = calc_light_multiplier(palookuplight[0]); + light[1] = calc_light_multiplier(palookuplight[1]); + light[2] = calc_light_multiplier(palookuplight[2]); + light[3] = calc_light_multiplier(palookuplight[3]); + + ShadeConstants shade_constants = _shade_constants; + + uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); + + DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; + DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; + int skipped = thread->skipped_by_thread(_dest_y); + for (int i = 0; i < 4; i++) + { + local_vplce[i] += local_vince[i] * skipped; + local_vince[i] *= thread->num_cores; + } + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); + + __m128i msrc_alpha = _mm_set1_epi16(src_alpha); + __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + + do + { + uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; + uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; + uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; + uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; + + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_SHADE_SIMPLE(fg); + + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)dest, out); + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); + + __m128i msrc_alpha = _mm_set1_epi16(src_alpha); + __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + + do + { + uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; + uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; + uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; + uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; + + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_SHADE(fg, shade_constants); + + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)dest, out); dest += pitch; } while (--count); } diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 4da963430..1e1236f0e 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -1436,7 +1436,14 @@ void rt_subclamp1col_rgba (int hx, int sx, int yl, int yh) // Subtracts all four spans to the screen starting at sx with clamping. void rt_subclamp4cols_rgba (int sx, int yl, int yh) { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(sx, yl, yh); +#endif } // Translates and subtracts one span at hx to the screen at sx with clamping. @@ -1462,7 +1469,14 @@ void rt_revsubclamp1col_rgba (int hx, int sx, int yl, int yh) // Subtracts all four spans from the screen starting at sx with clamping. void rt_revsubclamp4cols_rgba (int sx, int yl, int yh) { +#ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); +#else + if (!r_linearlight) + DrawerCommandQueue::QueueCommand(sx, yl, yh); + else + DrawerCommandQueue::QueueCommand(sx, yl, yh); +#endif } // Translates and subtracts one span at hx from the screen at sx with clamping. diff --git a/src/r_drawt_rgba_sse.h b/src/r_drawt_rgba_sse.h index 5b8ae8081..684be2b6a 100644 --- a/src/r_drawt_rgba_sse.h +++ b/src/r_drawt_rgba_sse.h @@ -493,3 +493,255 @@ public: } } }; + +class VecCommand(RtSubClamp4colsRGBA) : public DrawerCommand +{ + int sx; + int yl; + int yh; + BYTE * RESTRICT _destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; + +public: + VecCommand(RtSubClamp4colsRGBA)(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + int sincr; + + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) + return; + + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = _pitch * thread->num_cores; + sincr = 4 * thread->num_cores; + + uint32_t light = calc_light_multiplier(_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + ShadeConstants shade_constants = _shade_constants; + + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT(light); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (bg_red * bg_alpha - fg_red * fg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_hi, mbg_alpha), _mm_mullo_epi16(fg_hi, mfg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_lo, mbg_alpha), _mm_mullo_epi16(fg_lo, mfg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += sincr; + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT(light, shade_constants); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE(fg, shade_constants); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (bg_red * bg_alpha - fg_red * fg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_hi, mbg_alpha), _mm_mullo_epi16(fg_hi, mfg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_lo, mbg_alpha), _mm_mullo_epi16(fg_lo, mfg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += sincr; + dest += pitch; + } while (--count); + } + } +}; + +class VecCommand(RtRevSubClamp4colsRGBA) : public DrawerCommand +{ + int sx; + int yl; + int yh; + BYTE * RESTRICT _destorg; + int _pitch; + fixed_t _light; + fixed_t _srcalpha; + fixed_t _destalpha; + ShadeConstants _shade_constants; + +public: + VecCommand(RtRevSubClamp4colsRGBA)(int sx, int yl, int yh) + { + this->sx = sx; + this->yl = yl; + this->yh = yh; + + _destorg = dc_destorg; + _pitch = dc_pitch; + _light = dc_light; + _srcalpha = dc_srcalpha; + _destalpha = dc_destalpha; + _shade_constants = dc_shade_constants; + } + + void Execute(DrawerThread *thread) override + { + uint32_t *source; + uint32_t *dest; + int count; + int pitch; + int sincr; + + count = thread->count_for_thread(yl, yh - yl + 1); + if (count <= 0) + return; + + dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); + source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; + pitch = _pitch * thread->num_cores; + sincr = 4 * thread->num_cores; + + uint32_t light = calc_light_multiplier(_light); + uint32_t *palette = (uint32_t*)GPalette.BaseColors; + ShadeConstants shade_constants = _shade_constants; + + uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); + uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT(light); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha - bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += sincr; + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT(light, shade_constants); + + __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); + __m128i mbg_alpha = _mm_set_epi16(256, bg_alpha, bg_alpha, bg_alpha, 256, bg_alpha, bg_alpha, bg_alpha); + + do { + uint32_t p0 = source[0]; + uint32_t p1 = source[1]; + uint32_t p2 = source[2]; + uint32_t p3 = source[3]; + + // shade_pal_index: + __m128i fg = _mm_set_epi32(palette[p3], palette[p2], palette[p1], palette[p0]); + VEC_SHADE(fg, shade_constants); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + + // unpack bg: + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + // (fg_red * fg_alpha - bg_red * bg_alpha) / 256: + __m128i color_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_hi, mfg_alpha), _mm_mullo_epi16(bg_hi, mbg_alpha)), 8); + __m128i color_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_lo, mfg_alpha), _mm_mullo_epi16(bg_lo, mbg_alpha)), 8); + + __m128i color = _mm_packus_epi16(color_lo, color_hi); + _mm_storeu_si128((__m128i*)dest, color); + + source += sincr; + dest += pitch; + } while (--count); + } + } +}; From e72a032a114c7710112534157abad8fa300c2f7d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 19 Jun 2016 23:12:10 +0200 Subject: [PATCH 061/100] Fixed alpha channel issue with textures --- src/textures/pngtexture.cpp | 10 ++++++++-- src/textures/texture.cpp | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/textures/pngtexture.cpp b/src/textures/pngtexture.cpp index 206797a34..408cf1e2f 100644 --- a/src/textures/pngtexture.cpp +++ b/src/textures/pngtexture.cpp @@ -724,8 +724,9 @@ void FPNGTexture::MakeTextureBgra () { for (y = Height; y > 0; --y) { + // output as premultiplied alpha uint32_t alpha = in[1]; - uint32_t gray = in[0]; + uint32_t gray = (in[0] * alpha + 127) / 255; *out++ = (alpha << 24) | (gray << 16) | (gray << 8) | gray; in += pitch; } @@ -740,7 +741,12 @@ void FPNGTexture::MakeTextureBgra () { for (y = Height; y > 0; --y) { - *out++ = (((uint32_t)in[3]) << 24) | (((uint32_t)in[0]) << 16) | (((uint32_t)in[1]) << 8) | ((uint32_t)in[2]); + // output as premultiplied alpha + uint32_t alpha = in[3]; + uint32_t red = (in[0] * alpha + 127) / 255; + uint32_t green = (in[1] * alpha + 127) / 255; + uint32_t blue = (in[2] * alpha + 127) / 255; + *out++ = (alpha << 24) | (red << 16) | (green << 8) | blue; in += pitch; } in -= backstep; diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index 0030719cb..da5dd8ad7 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -203,7 +203,7 @@ const uint32_t *FTexture::GetPixelsBgra() PixelsBgra.resize(Width * Height); for (int i = 0; i < Width * Height; i++) { - PixelsBgra[i] = GPalette.BaseColors[indices[i]].d; + PixelsBgra[i] = 0xff000000 | GPalette.BaseColors[indices[i]].d; } } return PixelsBgra.data(); From d3bc68a160be4b6549f68454b308ad66c62e1d50 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 19 Jun 2016 23:37:22 +0200 Subject: [PATCH 062/100] Disabled the AVX intrinsics --- src/r_draw_rgba.cpp | 40 +++++++++------------------------------- src/r_drawt_rgba.cpp | 36 ++++++++---------------------------- 2 files changed, 17 insertions(+), 59 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 96232ab0c..b437fbe00 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -58,7 +58,6 @@ extern float rw_lightstep; extern int wallshade; CVAR(Bool, r_multithreaded, true, 0) -CVAR(Bool, r_linearlight, false, 0) #ifndef NO_SSE @@ -71,7 +70,7 @@ CVAR(Bool, r_linearlight, false, 0) #define VEC_SHADE_INIT4 SSE_SHADE_INIT4 #define VEC_SHADE SSE_SHADE #include "r_draw_rgba_sse.h" - +/* // Generate AVX drawers: #undef VecCommand #undef VEC_SHADE_SIMPLE_INIT @@ -88,7 +87,7 @@ CVAR(Bool, r_linearlight, false, 0) #define VEC_SHADE_INIT4 AVX_LINEAR_SHADE_INIT4 #define VEC_SHADE AVX_LINEAR_SHADE #include "r_draw_rgba_sse.h" - +*/ #endif ///////////////////////////////////////////////////////////////////////////// @@ -3652,10 +3651,7 @@ void R_DrawSpan_rgba() #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); #endif } @@ -3709,10 +3705,7 @@ void vlinec4_rgba() #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); #endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; @@ -3729,10 +3722,7 @@ void mvlinec4_rgba() #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); #endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; @@ -3749,10 +3739,7 @@ void tmvline4_add_rgba() #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); #endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; @@ -3769,10 +3756,7 @@ void tmvline4_addclamp_rgba() #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); #endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; @@ -3789,10 +3773,7 @@ void tmvline4_subclamp_rgba() #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); #endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; @@ -3809,10 +3790,7 @@ void tmvline4_revsubclamp_rgba() #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); #endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 1e1236f0e..e239674e8 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -51,8 +51,6 @@ extern unsigned int dc_tspans[4][MAXHEIGHT]; extern unsigned int *dc_ctspan[4]; extern unsigned int *horizspan[4]; -EXTERN_CVAR(Bool, r_linearlight) - #ifndef NO_SSE // Generate SSE drawers: @@ -64,7 +62,7 @@ EXTERN_CVAR(Bool, r_linearlight) #define VEC_SHADE_INIT4 SSE_SHADE_INIT4 #define VEC_SHADE SSE_SHADE #include "r_drawt_rgba_sse.h" - +/* // Generate AVX drawers: #undef VecCommand #undef VEC_SHADE_SIMPLE_INIT @@ -81,7 +79,7 @@ EXTERN_CVAR(Bool, r_linearlight) #define VEC_SHADE_INIT4 AVX_LINEAR_SHADE_INIT4 #define VEC_SHADE AVX_LINEAR_SHADE #include "r_drawt_rgba_sse.h" - +*/ #endif ///////////////////////////////////////////////////////////////////////////// @@ -1311,10 +1309,7 @@ void rt_map4cols_rgba (int sx, int yl, int yh) #ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(sx, yl, yh); + DrawerCommandQueue::QueueCommand(sx, yl, yh); #endif } @@ -1354,10 +1349,7 @@ void rt_add4cols_rgba (int sx, int yl, int yh) #ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(sx, yl, yh); + DrawerCommandQueue::QueueCommand(sx, yl, yh); #endif } @@ -1387,10 +1379,7 @@ void rt_shaded4cols_rgba (int sx, int yl, int yh) #ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(sx, yl, yh); + DrawerCommandQueue::QueueCommand(sx, yl, yh); #endif } @@ -1406,10 +1395,7 @@ void rt_addclamp4cols_rgba (int sx, int yl, int yh) #ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(sx, yl, yh); + DrawerCommandQueue::QueueCommand(sx, yl, yh); #endif } @@ -1439,10 +1425,7 @@ void rt_subclamp4cols_rgba (int sx, int yl, int yh) #ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(sx, yl, yh); + DrawerCommandQueue::QueueCommand(sx, yl, yh); #endif } @@ -1472,10 +1455,7 @@ void rt_revsubclamp4cols_rgba (int sx, int yl, int yh) #ifdef NO_SSE DrawerCommandQueue::QueueCommand(sx, yl, yh); #else - if (!r_linearlight) - DrawerCommandQueue::QueueCommand(sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(sx, yl, yh); + DrawerCommandQueue::QueueCommand(sx, yl, yh); #endif } From 6daeb5a15881c2198af31cb564c23d6090f026d4 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 20 Jun 2016 02:36:54 +0200 Subject: [PATCH 063/100] Blend mode fixes --- src/r_draw_rgba.cpp | 32 +++++++-------- src/r_draw_rgba.h | 87 ++++++++++++++++++---------------------- src/r_draw_rgba_sse.h | 48 ++++++++-------------- src/textures/texture.cpp | 5 ++- 4 files changed, 76 insertions(+), 96 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index b437fbe00..f317a34d6 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -2538,8 +2538,8 @@ public: { uint32_t pix = source[frac >> bits]; - uint32_t fg_alpha, bg_alpha; - calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); + uint32_t fg_alpha = src_alpha; + uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); uint32_t fg = shade_bgra(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; @@ -2631,8 +2631,8 @@ public: { uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - uint32_t fg_alpha, bg_alpha; - calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); + uint32_t fg_alpha = src_alpha; + uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); uint32_t fg = shade_bgra(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; @@ -2709,8 +2709,8 @@ public: { uint32_t pix = source[frac >> bits]; - uint32_t fg_alpha, bg_alpha; - calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); + uint32_t fg_alpha = src_alpha; + uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); uint32_t fg = shade_bgra(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; @@ -2802,8 +2802,8 @@ public: { uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - uint32_t fg_alpha, bg_alpha; - calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); + uint32_t fg_alpha = src_alpha; + uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); uint32_t fg = shade_bgra(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; @@ -2880,8 +2880,8 @@ public: { uint32_t pix = source[frac >> bits]; - uint32_t fg_alpha, bg_alpha; - calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); + uint32_t fg_alpha = src_alpha; + uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); uint32_t fg = shade_bgra(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; @@ -2973,8 +2973,8 @@ public: { uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - uint32_t fg_alpha, bg_alpha; - calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); + uint32_t fg_alpha = src_alpha; + uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); uint32_t fg = shade_bgra(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; @@ -3051,8 +3051,8 @@ public: { uint32_t pix = source[frac >> bits]; - uint32_t fg_alpha, bg_alpha; - calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); + uint32_t fg_alpha = src_alpha; + uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); uint32_t fg = shade_bgra(pix, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; @@ -3144,8 +3144,8 @@ public: { uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - uint32_t fg_alpha, bg_alpha; - calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); + uint32_t fg_alpha = src_alpha; + uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); uint32_t fg = shade_bgra(pix, light[i], shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 66be1f38b..2527e84a6 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -339,6 +339,7 @@ FORCEINLINE uint32_t shade_bgra_simple(uint32_t color, uint32_t light) FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants) { const PalEntry &color = GPalette.BaseColors[index]; + uint32_t alpha = color.d & 0xff000000; uint32_t red = color.r; uint32_t green = color.g; uint32_t blue = color.b; @@ -367,11 +368,12 @@ FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const Shade green = (green * constants.light_green) / 256; blue = (blue * constants.light_blue) / 256; } - return 0xff000000 | (red << 16) | (green << 8) | blue; + return alpha | (red << 16) | (green << 8) | blue; } FORCEINLINE uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConstants &constants) { + uint32_t alpha = color & 0xff000000; uint32_t red = (color >> 16) & 0xff; uint32_t green = (color >> 8) & 0xff; uint32_t blue = color & 0xff; @@ -400,12 +402,12 @@ FORCEINLINE uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConst green = (green * constants.light_green) / 256; blue = (blue * constants.light_blue) / 256; } - return 0xff000000 | (red << 16) | (green << 8) | blue; + return alpha | (red << 16) | (green << 8) | blue; } FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) { - uint32_t fg_alpha = (fg >> 24) & 0xff; + uint32_t fg_alpha = fg >> 24; uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -468,11 +470,11 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) __m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \ __m256 m255 = _mm256_set1_ps(255.0f); \ __m256 color = _mm256_set_ps( \ - shade_constants.light_alpha * (1.0f/256.0f), shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f), \ - shade_constants.light_alpha * (1.0f/256.0f), shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f)); \ + 1.0f, shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f), \ + 1.0f, shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f)); \ __m256 fade = _mm256_set_ps( \ - shade_constants.fade_alpha * (1.0f/256.0f), shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f), \ - shade_constants.fade_alpha * (1.0f/256.0f), shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f)); \ + 0.0f, shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f), \ + 0.0f, shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f)); \ __m256 fade_amount_hi = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_hi)); \ __m256 fade_amount_lo = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_lo)); \ __m256 inv_desaturate = _mm256_set1_ps((256 - shade_constants.desaturate) * (1.0f/256.0f)); \ @@ -488,11 +490,11 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) __m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \ __m256 m255 = _mm256_set1_ps(255.0f); \ __m256 color = _mm256_set_ps( \ - shade_constants.light_alpha * (1.0f/256.0f), shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f), \ - shade_constants.light_alpha * (1.0f/256.0f), shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f)); \ + 1.0f, shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f), \ + 1.0f, shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f)); \ __m256 fade = _mm256_set_ps( \ - shade_constants.fade_alpha * (1.0f/256.0f), shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f), \ - shade_constants.fade_alpha * (1.0f/256.0f), shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f)); \ + 0.0f, shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f), \ + 0.0f, shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f)); \ __m256 fade_amount_hi = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_hi)); \ __m256 fade_amount_lo = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_lo)); \ __m256 inv_desaturate = _mm256_set1_ps((256 - shade_constants.desaturate) * (1.0f/256.0f)); \ @@ -585,39 +587,30 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) fg = _mm_adds_epu8(fg, bg); \ } -/* -FORCEINLINE void calc_blend_alpha(uint32_t fg, uint32_t src_alpha, uint32_t dest_alpha, uint32_t &fg_alpha, uint32_t &bg_alpha) +// Calculates the final alpha values to be used when combined with the source texture alpha channel +FORCEINLINE uint32_t calc_blend_bgalpha(uint32_t fg, uint32_t dest_alpha) { - fg_alpha = src_alpha; - bg_alpha = dest_alpha; + uint32_t alpha = fg >> 24; + alpha += alpha >> 7; + return 256 - alpha; // (dest_alpha * (256 - alpha)) >> 8; } -#define VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha) \ - __m128i fg_alpha_hi = msrc_alpha; \ - __m128i fg_alpha_lo = msrc_alpha; \ - __m128i bg_alpha_hi = mdest_alpha; \ - __m128i bg_alpha_lo = mdest_alpha; -*/ +#define VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha) \ + __m128i msrc_alpha = _mm_set1_epi16(src_alpha); \ + __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); // Calculates the final alpha values to be used when combined with the source texture alpha channel -FORCEINLINE void calc_blend_alpha(uint32_t fg, uint32_t src_alpha, uint32_t dest_alpha, uint32_t &fg_alpha, uint32_t &bg_alpha) -{ - fg_alpha = (fg >> 24) & 0xff; - fg_alpha += fg_alpha >> 7; - bg_alpha = (dest_alpha * (256 - fg_alpha)) >> 8; - fg_alpha = (src_alpha * fg_alpha) >> 8; -} - -// Calculates the final alpha values to be used when combined with the source texture alpha channel -#define VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha) \ - __m128i fg_alpha_hi = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpackhi_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \ - __m128i fg_alpha_lo = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpacklo_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \ - fg_alpha_hi = _mm_add_epi16(fg_alpha_hi, _mm_srli_epi16(fg_alpha_hi, 7)); \ - fg_alpha_lo = _mm_add_epi16(fg_alpha_lo, _mm_srli_epi16(fg_alpha_lo, 7)); \ - __m128i bg_alpha_hi = _mm_srli_epi16(_mm_mullo_epi16(_mm_sub_epi16(_mm_set1_epi16(256), fg_alpha_hi), mdest_alpha), 8); \ - __m128i bg_alpha_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_sub_epi16(_mm_set1_epi16(256), fg_alpha_lo), mdest_alpha), 8); \ - fg_alpha_hi = _mm_srli_epi16(_mm_mullo_epi16(fg_alpha_hi, msrc_alpha), 8); \ - fg_alpha_lo = _mm_srli_epi16(_mm_mullo_epi16(fg_alpha_lo, msrc_alpha), 8); +#define VEC_CALC_BLEND_ALPHA(fg) \ + __m128i fg_alpha_hi, fg_alpha_lo, bg_alpha_hi, bg_alpha_lo; { \ + __m128i alpha_hi = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpackhi_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \ + __m128i alpha_lo = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpacklo_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \ + alpha_hi = _mm_add_epi16(alpha_hi, _mm_srli_epi16(alpha_hi, 7)); \ + alpha_lo = _mm_add_epi16(alpha_lo, _mm_srli_epi16(alpha_lo, 7)); \ + bg_alpha_hi = _mm_sub_epi16(_mm_set1_epi16(256), alpha_hi); /* _mm_srli_epi16(_mm_mullo_epi16(_mm_sub_epi16(_mm_set1_epi16(256), alpha_hi), mdest_alpha), 8);*/ \ + bg_alpha_lo = _mm_sub_epi16(_mm_set1_epi16(256), alpha_lo); /* _mm_srli_epi16(_mm_mullo_epi16(_mm_sub_epi16(_mm_set1_epi16(256), alpha_lo), mdest_alpha), 8);*/ \ + fg_alpha_hi = msrc_alpha; \ + fg_alpha_lo = msrc_alpha; \ + } // Calculate constants for a simple shade #define SSE_SHADE_SIMPLE_INIT(light) \ @@ -645,11 +638,11 @@ FORCEINLINE void calc_blend_alpha(uint32_t fg, uint32_t src_alpha, uint32_t dest __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ __m128i mlight_lo = mlight_hi; \ __m128i color = _mm_set_epi16( \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ + 256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + 256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ __m128i fade = _mm_set_epi16( \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ + 0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + 0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ __m128i fade_amount_lo = fade_amount_hi; \ __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ @@ -659,11 +652,11 @@ FORCEINLINE void calc_blend_alpha(uint32_t fg, uint32_t src_alpha, uint32_t dest __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); \ __m128i color = _mm_set_epi16( \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ - shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ + 256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ + 256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ __m128i fade = _mm_set_epi16( \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ - shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ + 0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ + 0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ __m128i fade_amount_lo = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_lo)); \ __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ diff --git a/src/r_draw_rgba_sse.h b/src/r_draw_rgba_sse.h index 0597580e1..220638c75 100644 --- a/src/r_draw_rgba_sse.h +++ b/src/r_draw_rgba_sse.h @@ -554,9 +554,7 @@ public: if (shade_constants.simple_shade) { VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); - - __m128i msrc_alpha = _mm_set1_epi16(src_alpha); - __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); do { @@ -572,7 +570,7 @@ public: __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_CALC_BLEND_ALPHA(fg); VEC_SHADE_SIMPLE(fg); __m128i bg = _mm_loadu_si128((const __m128i*)dest); @@ -593,9 +591,7 @@ public: else { VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); - - __m128i msrc_alpha = _mm_set1_epi16(src_alpha); - __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); do { @@ -610,7 +606,7 @@ public: local_vplce[3] = local_vplce[3] + local_vince[3]; __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_CALC_BLEND_ALPHA(fg); VEC_SHADE(fg, shade_constants); __m128i bg = _mm_loadu_si128((const __m128i*)dest); @@ -697,9 +693,7 @@ public: if (shade_constants.simple_shade) { VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); - - __m128i msrc_alpha = _mm_set1_epi16(src_alpha); - __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); do { @@ -714,7 +708,7 @@ public: local_vplce[3] = local_vplce[3] + local_vince[3]; __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_CALC_BLEND_ALPHA(fg); VEC_SHADE_SIMPLE(fg); __m128i bg = _mm_loadu_si128((const __m128i*)dest); @@ -735,9 +729,7 @@ public: else { VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); - - __m128i msrc_alpha = _mm_set1_epi16(src_alpha); - __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); do { @@ -752,7 +744,7 @@ public: local_vplce[3] = local_vplce[3] + local_vince[3]; __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_CALC_BLEND_ALPHA(fg); VEC_SHADE(fg, shade_constants); __m128i bg = _mm_loadu_si128((const __m128i*)dest); @@ -839,9 +831,7 @@ public: if (shade_constants.simple_shade) { VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); - - __m128i msrc_alpha = _mm_set1_epi16(src_alpha); - __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); do { @@ -856,7 +846,7 @@ public: local_vplce[3] = local_vplce[3] + local_vince[3]; __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_CALC_BLEND_ALPHA(fg); VEC_SHADE_SIMPLE(fg); __m128i bg = _mm_loadu_si128((const __m128i*)dest); @@ -877,9 +867,7 @@ public: else { VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); - - __m128i msrc_alpha = _mm_set1_epi16(src_alpha); - __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); do { @@ -894,7 +882,7 @@ public: local_vplce[3] = local_vplce[3] + local_vince[3]; __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_CALC_BLEND_ALPHA(fg); VEC_SHADE(fg, shade_constants); __m128i bg = _mm_loadu_si128((const __m128i*)dest); @@ -981,9 +969,7 @@ public: if (shade_constants.simple_shade) { VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); - - __m128i msrc_alpha = _mm_set1_epi16(src_alpha); - __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); do { @@ -998,7 +984,7 @@ public: local_vplce[3] = local_vplce[3] + local_vince[3]; __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_CALC_BLEND_ALPHA(fg); VEC_SHADE_SIMPLE(fg); __m128i bg = _mm_loadu_si128((const __m128i*)dest); @@ -1019,9 +1005,7 @@ public: else { VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); - - __m128i msrc_alpha = _mm_set1_epi16(src_alpha); - __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); do { @@ -1036,7 +1020,7 @@ public: local_vplce[3] = local_vplce[3] + local_vince[3]; __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); + VEC_CALC_BLEND_ALPHA(fg); VEC_SHADE(fg, shade_constants); __m128i bg = _mm_loadu_si128((const __m128i*)dest); diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index da5dd8ad7..16a9e63a6 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -203,7 +203,10 @@ const uint32_t *FTexture::GetPixelsBgra() PixelsBgra.resize(Width * Height); for (int i = 0; i < Width * Height; i++) { - PixelsBgra[i] = 0xff000000 | GPalette.BaseColors[indices[i]].d; + if (indices[i] != 0) + PixelsBgra[i] = 0xff000000 | GPalette.BaseColors[indices[i]].d; + else + PixelsBgra[i] = 0; } } return PixelsBgra.data(); From c70aa1fe99657e053e3b0aa1a9d00b307ab54bca Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 20 Jun 2016 08:24:02 +0200 Subject: [PATCH 064/100] Added bilinear filtering --- src/r_draw.cpp | 4 ++ src/r_draw.h | 36 ++++++++++ src/r_draw_rgba.cpp | 158 ++++++++++++++++++++++++++++++++------------ src/r_draw_rgba.h | 52 +++++++++++++++ src/r_segs.cpp | 54 +++++++++------ 5 files changed, 242 insertions(+), 62 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 7829e2b77..83c4ac8d4 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -162,6 +162,8 @@ fixed_t dc_destalpha; // Alpha value used by dc_destblend // first pixel in a column (possibly virtual) const BYTE* dc_source; +const BYTE* dc_source2; +uint32_t dc_texturefracx; BYTE* dc_dest; int dc_count; @@ -171,6 +173,8 @@ DWORD vince[4]; BYTE* palookupoffse[4]; fixed_t palookuplight[4]; const BYTE* bufplce[4]; +const BYTE* bufplce2[4]; +uint32_t buftexturefracx[4]; // just for profiling int dccount; diff --git a/src/r_draw.h b/src/r_draw.h index 99ee4d10d..d5ecbd289 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -71,6 +71,8 @@ extern "C" fixed_t dc_destalpha; // first pixel in a column extern "C" const BYTE* dc_source; +extern "C" const BYTE* dc_source2; +extern "C" uint32_t dc_texturefracx; extern "C" BYTE *dc_dest, *dc_destorg; extern "C" int dc_count; @@ -80,6 +82,8 @@ extern "C" DWORD vince[4]; extern "C" BYTE* palookupoffse[4]; extern "C" fixed_t palookuplight[4]; extern "C" const BYTE* bufplce[4]; +extern "C" const BYTE* bufplce2[4]; +extern "C" uint32_t buftexturefracx[4]; // [RH] Temporary buffer for column drawing extern "C" BYTE *dc_temp; @@ -374,4 +378,36 @@ void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade); void R_SetTranslationMap(lighttable_t *translation); +extern bool r_swtruecolor; +EXTERN_CVAR(Bool, r_bilinear); + +// Texture sampler state needed for bilinear filtering +struct SamplerSetup +{ + SamplerSetup() { } + SamplerSetup(fixed_t xoffset, bool magnifying, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)); + + const BYTE *source; + const BYTE *source2; + uint32_t texturefracx; +}; + +inline SamplerSetup::SamplerSetup(fixed_t xoffset, bool magnifying, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)) +{ + // Only do bilinear filtering if enabled and not a magnifying filter + if (!r_swtruecolor || !r_bilinear || magnifying) + { + source = getcol(texture, xoffset >> FRACBITS); + source2 = nullptr; + texturefracx = 0; + } + else + { + int tx = (xoffset - FRACUNIT / 2) >> FRACBITS; + source = getcol(texture, tx); + source2 = getcol(texture, tx + 1); + texturefracx = ((xoffset + FRACUNIT / 2) >> (FRACBITS - 4)) & 15; + } +} + #endif diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index f317a34d6..d85d9994b 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -58,6 +58,7 @@ extern float rw_lightstep; extern int wallshade; CVAR(Bool, r_multithreaded, true, 0) +CVAR(Bool, r_bilinear, false, 0) #ifndef NO_SSE @@ -1547,41 +1548,72 @@ public: uint32_t light = calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; - if (_xbits == 6 && _ybits == 6) + fixed_t xmagnitude = abs((fixed_t)xstep) >> (32 - _xbits - FRACBITS); + fixed_t ymagnitude = abs((fixed_t)ystep) >> (32 - _ybits - FRACBITS); + fixed_t magnitude = xmagnitude + ymagnitude; + + bool magnifying = !r_bilinear || magnitude >> (FRACBITS - 1) == 0; + if (magnifying) { - // 64x64 is the most common case by far, so special case it. - - do + if (_xbits == 6 && _ybits == 6) { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + // 64x64 is the most common case by far, so special case it. - // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); + do + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); + // Lookup pixel from flat texture tile + *dest++ = shade_bgra(source[spot], light, shade_constants); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + + do + { + // Current texture index in u,v. + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + // Lookup pixel from flat texture tile + *dest++ = shade_bgra(source[spot], light, shade_constants); + + // Next step in u,v. + xfrac += xstep; + yfrac += ystep; + } while (--count); + } } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; - - do + if (_xbits == 6 && _ybits == 6) { - // Current texture index in u,v. - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + // 64x64 is the most common case by far, so special case it. - // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); + do + { + *dest++ = shade_bgra(sample_bilinear(source, xfrac, yfrac, 26, 26), light, shade_constants); + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + do + { + *dest++ = shade_bgra(sample_bilinear(source, xfrac, yfrac, 32 - _xbits, 32 - _ybits), light, shade_constants); + xfrac += xstep; + yfrac += ystep; + } while (--count); + } } } }; @@ -2253,6 +2285,8 @@ class Vlinec1RGBACommand : public DrawerCommand DWORD _texturefrac; int _count; const BYTE * RESTRICT _source; + const BYTE * RESTRICT _source2; + uint32_t _texturefracx; BYTE * RESTRICT _dest; int vlinebits; int _pitch; @@ -2266,6 +2300,8 @@ public: _texturefrac = dc_texturefrac; _count = dc_count; _source = dc_source; + _source2 = dc_source2; + _texturefracx = dc_texturefracx; _dest = dc_dest; vlinebits = ::vlinebits; _pitch = dc_pitch; @@ -2282,6 +2318,8 @@ public: DWORD fracstep = _iscale * thread->num_cores; DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); const uint32 *source = (const uint32 *)_source; + const uint32 *source2 = (const uint32 *)_source2; + uint32_t texturefracx = _texturefracx; uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = vlinebits; int pitch = _pitch * thread->num_cores; @@ -2289,12 +2327,24 @@ public: uint32_t light = calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; - do + if (_source2 == nullptr) { - *dest = shade_bgra(source[frac >> bits], light, shade_constants); - frac += fracstep; - dest += pitch; - } while (--count); + do + { + *dest = shade_bgra(source[frac >> bits], light, shade_constants); + frac += fracstep; + dest += pitch; + } while (--count); + } + else + { + do + { + *dest = shade_bgra(sample_bilinear(source, source2, texturefracx, frac, bits), light, shade_constants); + frac += fracstep; + dest += pitch; + } while (--count); + } } }; @@ -2308,7 +2358,9 @@ class Vlinec4RGBACommand : public DrawerCommand fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; - const uint32 * RESTRICT bufplce[4]; + const uint32_t * RESTRICT bufplce[4]; + const uint32_t * RESTRICT bufplce2[4]; + uint32_t buftexturefracx[4]; public: Vlinec4RGBACommand() @@ -2323,7 +2375,9 @@ public: palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; + bufplce[i] = (const uint32_t *)::bufplce[i]; + bufplce2[i] = (const uint32_t *)::bufplce2[i]; + buftexturefracx[i] = ::buftexturefracx[i]; } } @@ -2354,14 +2408,28 @@ public: local_vince[i] *= thread->num_cores; } - do + if (bufplce2[0] == nullptr) { - dest[0] = shade_bgra(bufplce[0][(place = local_vplce[0]) >> bits], light0, shade_constants); local_vplce[0] = place + local_vince[0]; - dest[1] = shade_bgra(bufplce[1][(place = local_vplce[1]) >> bits], light1, shade_constants); local_vplce[1] = place + local_vince[1]; - dest[2] = shade_bgra(bufplce[2][(place = local_vplce[2]) >> bits], light2, shade_constants); local_vplce[2] = place + local_vince[2]; - dest[3] = shade_bgra(bufplce[3][(place = local_vplce[3]) >> bits], light3, shade_constants); local_vplce[3] = place + local_vince[3]; - dest += pitch; - } while (--count); + do + { + dest[0] = shade_bgra(bufplce[0][(place = local_vplce[0]) >> bits], light0, shade_constants); local_vplce[0] = place + local_vince[0]; + dest[1] = shade_bgra(bufplce[1][(place = local_vplce[1]) >> bits], light1, shade_constants); local_vplce[1] = place + local_vince[1]; + dest[2] = shade_bgra(bufplce[2][(place = local_vplce[2]) >> bits], light2, shade_constants); local_vplce[2] = place + local_vince[2]; + dest[3] = shade_bgra(bufplce[3][(place = local_vplce[3]) >> bits], light3, shade_constants); local_vplce[3] = place + local_vince[3]; + dest += pitch; + } while (--count); + } + else + { + do + { + dest[0] = shade_bgra(sample_bilinear(bufplce[0], bufplce2[0], buftexturefracx[0], place = local_vplce[0], bits), light0, shade_constants); local_vplce[0] = place + local_vince[0]; + dest[1] = shade_bgra(sample_bilinear(bufplce[1], bufplce2[1], buftexturefracx[1], place = local_vplce[1], bits), light1, shade_constants); local_vplce[1] = place + local_vince[1]; + dest[2] = shade_bgra(sample_bilinear(bufplce[2], bufplce2[2], buftexturefracx[2], place = local_vplce[2], bits), light2, shade_constants); local_vplce[2] = place + local_vince[2]; + dest[3] = shade_bgra(sample_bilinear(bufplce[3], bufplce2[3], buftexturefracx[3], place = local_vplce[3], bits), light3, shade_constants); local_vplce[3] = place + local_vince[3]; + dest += pitch; + } while (--count); + } } }; @@ -3651,7 +3719,10 @@ void R_DrawSpan_rgba() #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else - DrawerCommandQueue::QueueCommand(); + if (!r_bilinear) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); #endif } @@ -3705,7 +3776,10 @@ void vlinec4_rgba() #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else - DrawerCommandQueue::QueueCommand(); + if (!r_bilinear) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); #endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 2527e84a6..a266ce878 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -426,6 +426,58 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) return 0xff000000 | (red << 16) | (green << 8) | blue; } +FORCEINLINE uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, int ybits) +{ + uint32_t half = 1 << (ybits - 1); + uint32_t y = (texturefracy - half) >> ybits; + + uint32_t p00 = col0[y]; + uint32_t p01 = col0[y + 1]; + uint32_t p10 = col1[y]; + uint32_t p11 = col1[y + 1]; + + uint32_t inv_b = texturefracx; + uint32_t inv_a = ((texturefracy + half) >> (ybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t red = (RPART(p00) * a * b + RPART(p01) * inv_a * b + RPART(p10) * a * inv_b + RPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t green = (GPART(p00) * a * b + GPART(p01) * inv_a * b + GPART(p10) * a * inv_b + GPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t blue = (BPART(p00) * a * b + BPART(p01) * inv_a * b + BPART(p10) * a * inv_b + BPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t alpha = (APART(p00) * a * b + APART(p01) * inv_a * b + APART(p10) * a * inv_b + APART(p11) * inv_a * inv_b + 127) >> 8; + + return (alpha << 24) | (red << 16) | (green << 8) | blue; +} + +FORCEINLINE uint32_t sample_bilinear(const uint32_t *texture, dsfixed_t xfrac, dsfixed_t yfrac, int xbits, int ybits) +{ + int xshift = (32 - xbits); + int yshift = (32 - ybits); + int xmask = (1 << xshift) - 1; + int ymask = (1 << yshift) - 1; + uint32_t xhalf = 1 << (xbits - 1); + uint32_t yhalf = 1 << (ybits - 1); + uint32_t x = (xfrac - xhalf) >> xbits; + uint32_t y = (yfrac - yhalf) >> ybits; + + uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; + uint32_t p01 = texture[(y + 1 & ymask) + ((x & xmask) << yshift)]; + uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; + uint32_t p11 = texture[(y + 1 & ymask) + (((x + 1) & xmask) << yshift)]; + + uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; + uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t red = (RPART(p00) * a * b + RPART(p01) * inv_a * b + RPART(p10) * a * inv_b + RPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t green = (GPART(p00) * a * b + GPART(p01) * inv_a * b + GPART(p10) * a * inv_b + GPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t blue = (BPART(p00) * a * b + BPART(p01) * inv_a * b + BPART(p10) * a * inv_b + BPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t alpha = (APART(p00) * a * b + APART(p01) * inv_a * b + APART(p10) * a * inv_b + APART(p11) * inv_a * inv_b + 127) >> 8; + + return (alpha << 24) | (red << 16) | (green << 8) | blue; +} + // Calculate constants for a simple shade with gamma correction #define AVX_LINEAR_SHADE_SIMPLE_INIT(light) \ __m256 mlight_hi = _mm256_set_ps(1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f), 1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f)); \ diff --git a/src/r_segs.cpp b/src/r_segs.cpp index ad242b2f9..d71487bb9 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -58,6 +58,8 @@ CVAR(Bool, r_np2, true, 0) +EXTERN_CVAR(Bool, r_bilinear) + //CVAR (Int, ty, 8, 0) //CVAR (Int, tx, 8, 0) @@ -1066,14 +1068,16 @@ void R_RenderFakeWallRange (drawseg_t *ds, int x1, int x2) } // Draw a column with support for non-power-of-two ranges -uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv_step, uint32_t uv_max, const BYTE *source, DWORD(*draw1column)()) +uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv_step, uint32_t uv_max, const SamplerSetup &sampler, DWORD(*draw1column)()) { int pixelsize = r_swtruecolor ? 4 : 1; if (uv_max == 0) // power of two { int count = y2 - y1; - dc_source = source; + dc_source = sampler.source; + dc_source2 = sampler.source2; + dc_texturefracx = sampler.texturefracx; dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; dc_count = count; dc_iscale = uv_step; @@ -1097,7 +1101,9 @@ uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv next_uv_wrap++; uint32_t count = MIN(left, next_uv_wrap); - dc_source = source; + dc_source = sampler.source; + dc_source2 = sampler.source2; + dc_texturefracx = sampler.texturefracx; dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; dc_count = count; dc_iscale = uv_step; @@ -1115,7 +1121,7 @@ uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv } // Draw four columns with support for non-power-of-two ranges -void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_step, uint32_t uv_max, const BYTE **source, void(*draw4columns)()) +void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_step, uint32_t uv_max, const SamplerSetup *sampler, void(*draw4columns)()) { int pixelsize = r_swtruecolor ? 4 : 1; if (uv_max == 0) // power of two, no wrap handling needed @@ -1123,7 +1129,9 @@ void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_ste int count = y2 - y1; for (int i = 0; i < 4; i++) { - bufplce[i] = source[i]; + bufplce[i] = sampler[i].source; + bufplce2[i] = sampler[i].source2; + buftexturefracx[i] = sampler[i].texturefracx; vplce[i] = uv_pos[i]; vince[i] = uv_step[i]; @@ -1139,7 +1147,11 @@ void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_ste { dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; for (int i = 0; i < 4; i++) - bufplce[i] = source[i]; + { + bufplce[i] = sampler[i].source; + bufplce2[i] = sampler[i].source2; + buftexturefracx[i] = sampler[i].texturefracx; + } uint32_t left = y2 - y1; while (left > 0) @@ -1249,12 +1261,11 @@ void wallscan_any( if (!fixed) R_SetColorMapLight(basecolormap, light, wallshade); - const BYTE *source = getcol(rw_pic, (lwal[x] + xoffset) >> FRACBITS); - uint32_t uv_start, uv_step; calc_uv_start_and_step(y1, swal[x], yrepeat, uv_height, fracbits, uv_start, uv_step); - wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, source, draw1column); + SamplerSetup sampler(lwal[x] + xoffset, uv_step >> (fracbits - 1) == 0, rw_pic, getcol); + wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, sampler, draw1column); } // The aligned columns @@ -1264,10 +1275,6 @@ void wallscan_any( int y1[4] = { uwal[x], uwal[x + 1], uwal[x + 2], uwal[x + 3] }; int y2[4] = { dwal[x], dwal[x + 1], dwal[x + 2], dwal[x + 3] }; - const BYTE *source[4]; - for (int i = 0; i < 4; i++) - source[i] = getcol(rw_pic, (lwal[x + i] + xoffset) >> FRACBITS); - float lights[4]; for (int i = 0; i < 4; i++) { @@ -1276,8 +1283,16 @@ void wallscan_any( } uint32_t uv_pos[4], uv_step[4]; + int magnifying = 0; for (int i = 0; i < 4; i++) + { calc_uv_start_and_step(y1[i], swal[x + i], yrepeat, uv_height, fracbits, uv_pos[i], uv_step[i]); + magnifying |= uv_step[i] >> (fracbits - 1); + } + + SamplerSetup sampler[4]; + for (int i = 0; i < 4; i++) + sampler[i] = SamplerSetup(lwal[x + i] + xoffset, magnifying == 0, rw_pic, getcol); // Figure out where we vertically can start and stop drawing 4 columns in one go int middle_y1 = y1[0]; @@ -1305,7 +1320,7 @@ void wallscan_any( if (!fixed) R_SetColorMapLight(basecolormap, lights[i], wallshade); - wallscan_drawcol1(x + i, y1[i], y2[i], uv_pos[i], uv_step[i], uv_max, source[i], draw1column); + wallscan_drawcol1(x + i, y1[i], y2[i], uv_pos[i], uv_step[i], uv_max, sampler[i], draw1column); } continue; } @@ -1317,7 +1332,7 @@ void wallscan_any( R_SetColorMapLight(basecolormap, lights[i], wallshade); if (y1[i] < middle_y1) - uv_pos[i] = wallscan_drawcol1(x + i, y1[i], middle_y1, uv_pos[i], uv_step[i], uv_max, source[i], draw1column); + uv_pos[i] = wallscan_drawcol1(x + i, y1[i], middle_y1, uv_pos[i], uv_step[i], uv_max, sampler[i], draw1column); } // Draw the area where all 4 columns are active @@ -1337,7 +1352,7 @@ void wallscan_any( } } } - wallscan_drawcol4(x, middle_y1, middle_y2, uv_pos, uv_step, uv_max, source, draw4columns); + wallscan_drawcol4(x, middle_y1, middle_y2, uv_pos, uv_step, uv_max, sampler, draw4columns); // Draw the last rows where not all 4 columns are active for (int i = 0; i < 4; i++) @@ -1346,7 +1361,7 @@ void wallscan_any( R_SetColorMapLight(basecolormap, lights[i], wallshade); if (middle_y2 < y2[i]) - uv_pos[i] = wallscan_drawcol1(x + i, middle_y2, y2[i], uv_pos[i], uv_step[i], uv_max, source[i], draw1column); + uv_pos[i] = wallscan_drawcol1(x + i, middle_y2, y2[i], uv_pos[i], uv_step[i], uv_max, sampler[i], draw1column); } } @@ -1361,12 +1376,11 @@ void wallscan_any( if (!fixed) R_SetColorMapLight(basecolormap, light, wallshade); - const BYTE *source = getcol(rw_pic, (lwal[x] + xoffset) >> FRACBITS); - uint32_t uv_start, uv_step; calc_uv_start_and_step(y1, swal[x], yrepeat, uv_height, fracbits, uv_start, uv_step); - wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, source, draw1column); + SamplerSetup sampler(lwal[x] + xoffset, uv_step >> (fracbits - 1) == 0, rw_pic, getcol); + wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, sampler, draw1column); } NetUpdate (); From c1b5ba5b9064997cbe9802f1b5df59a88231d4e3 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 21 Jun 2016 06:22:43 +0200 Subject: [PATCH 065/100] Added SSE versions of bilinear filtering --- src/r_draw_rgba.cpp | 163 +++++++---- src/r_draw_rgba.h | 82 ++++++ src/r_draw_rgba_sse.h | 657 +++++++++++++++++++++++++++--------------- 3 files changed, 611 insertions(+), 291 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index d85d9994b..869edaba1 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -58,7 +58,7 @@ extern float rw_lightstep; extern int wallshade; CVAR(Bool, r_multithreaded, true, 0) -CVAR(Bool, r_bilinear, false, 0) +CVAR(Bool, r_bilinear, true, 0) #ifndef NO_SSE @@ -1680,43 +1680,70 @@ public: xstep = _xstep; ystep = _ystep; - if (_xbits == 6 && _ybits == 6) - { - // 64x64 is the most common case by far, so special case it. - do - { - uint32_t texdata; + fixed_t xmagnitude = abs((fixed_t)xstep) >> (32 - _xbits - FRACBITS); + fixed_t ymagnitude = abs((fixed_t)ystep) >> (32 - _ybits - FRACBITS); + fixed_t magnitude = xmagnitude + ymagnitude; - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - texdata = source[spot]; - if (texdata != 0) + bool magnifying = !r_bilinear || magnitude >> (FRACBITS - 1) == 0; + if (magnifying) + { + if (_xbits == 6 && _ybits == 6) + { + // 64x64 is the most common case by far, so special case it. + do { - *dest = shade_bgra(texdata, light, shade_constants); - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t texdata; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + texdata = source[spot]; + *dest = alpha_blend(shade_bgra(texdata, light, shade_constants), *dest); + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + uint32_t texdata; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + texdata = source[spot]; + *dest = alpha_blend(shade_bgra(texdata, light, shade_constants), *dest); + dest++; + xfrac += xstep; + yfrac += ystep; + } while (--count); + } } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; - do + if (_xbits == 6 && _ybits == 6) { - uint32_t texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - if (texdata != 0) + // 64x64 is the most common case by far, so special case it. + do { - *dest = shade_bgra(texdata, light, shade_constants); - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); + *dest++ = alpha_blend(shade_bgra(sample_bilinear(source, xfrac, yfrac, 26, 26), light, shade_constants), *dest); + xfrac += xstep; + yfrac += ystep; + } while (--count); + } + else + { + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; + do + { + *dest++ = alpha_blend(shade_bgra(sample_bilinear(source, xfrac, yfrac, 32 - _xbits, 32 - _ybits), light, shade_constants), *dest); + xfrac += xstep; + yfrac += ystep; + } while (--count); + } } } }; @@ -2439,6 +2466,8 @@ class Mvlinec1RGBACommand : public DrawerCommand DWORD _texturefrac; int _count; const BYTE * RESTRICT _source; + const BYTE * RESTRICT _source2; + uint32_t _texturefracx; BYTE * RESTRICT _dest; int mvlinebits; int _pitch; @@ -2452,6 +2481,8 @@ public: _texturefrac = dc_texturefrac; _count = dc_count; _source = dc_source; + _source2 = dc_source2; + _texturefracx = dc_texturefracx; _dest = dc_dest; mvlinebits = ::mvlinebits; _pitch = dc_pitch; @@ -2468,6 +2499,8 @@ public: DWORD fracstep = _iscale * thread->num_cores; DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); const uint32 *source = (const uint32 *)_source; + const uint32 *source2 = (const uint32 *)_source2; + uint32_t texturefracx = _texturefracx; uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int bits = mvlinebits; int pitch = _pitch * thread->num_cores; @@ -2475,13 +2508,25 @@ public: uint32_t light = calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; - do + if (_source2 == nullptr) { - uint32_t pix = source[frac >> bits]; - *dest = alpha_blend(shade_bgra(pix, light, shade_constants), *dest); - frac += fracstep; - dest += pitch; - } while (--count); + do + { + uint32_t pix = source[frac >> bits]; + *dest = alpha_blend(shade_bgra(pix, light, shade_constants), *dest); + frac += fracstep; + dest += pitch; + } while (--count); + } + else + { + do + { + *dest = alpha_blend(shade_bgra(sample_bilinear(source, source2, texturefracx, frac, bits), light, shade_constants), *dest); + frac += fracstep; + dest += pitch; + } while (--count); + } } }; @@ -2496,6 +2541,8 @@ class Mvlinec4RGBACommand : public DrawerCommand DWORD vplce[4]; DWORD vince[4]; const uint32 * RESTRICT bufplce[4]; + const uint32 * RESTRICT bufplce2[4]; + uint32_t buftexturefracx[4]; public: Mvlinec4RGBACommand() @@ -2511,6 +2558,8 @@ public: vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; bufplce[i] = (const uint32 *)::bufplce[i]; + bufplce2[i] = (const uint32_t *)::bufplce2[i]; + buftexturefracx[i] = ::buftexturefracx[i]; } } @@ -2541,15 +2590,29 @@ public: local_vince[i] *= thread->num_cores; } - do + if (bufplce2[0] == nullptr) { - uint32_t pix; - pix = bufplce[0][(place = local_vplce[0]) >> bits]; dest[0] = alpha_blend(shade_bgra(pix, light0, shade_constants), dest[0]); local_vplce[0] = place + local_vince[0]; - pix = bufplce[1][(place = local_vplce[1]) >> bits]; dest[1] = alpha_blend(shade_bgra(pix, light1, shade_constants), dest[1]); local_vplce[1] = place + local_vince[1]; - pix = bufplce[2][(place = local_vplce[2]) >> bits]; dest[2] = alpha_blend(shade_bgra(pix, light2, shade_constants), dest[2]); local_vplce[2] = place + local_vince[2]; - pix = bufplce[3][(place = local_vplce[3]) >> bits]; dest[3] = alpha_blend(shade_bgra(pix, light3, shade_constants), dest[3]); local_vplce[3] = place + local_vince[3]; - dest += pitch; - } while (--count); + do + { + uint32_t pix; + pix = bufplce[0][(place = local_vplce[0]) >> bits]; dest[0] = alpha_blend(shade_bgra(pix, light0, shade_constants), dest[0]); local_vplce[0] = place + local_vince[0]; + pix = bufplce[1][(place = local_vplce[1]) >> bits]; dest[1] = alpha_blend(shade_bgra(pix, light1, shade_constants), dest[1]); local_vplce[1] = place + local_vince[1]; + pix = bufplce[2][(place = local_vplce[2]) >> bits]; dest[2] = alpha_blend(shade_bgra(pix, light2, shade_constants), dest[2]); local_vplce[2] = place + local_vince[2]; + pix = bufplce[3][(place = local_vplce[3]) >> bits]; dest[3] = alpha_blend(shade_bgra(pix, light3, shade_constants), dest[3]); local_vplce[3] = place + local_vince[3]; + dest += pitch; + } while (--count); + } + else + { + do + { + dest[0] = alpha_blend(shade_bgra(sample_bilinear(bufplce[0], bufplce2[0], buftexturefracx[0], place = local_vplce[0], bits), light0, shade_constants), dest[0]); local_vplce[0] = place + local_vince[0]; + dest[1] = alpha_blend(shade_bgra(sample_bilinear(bufplce[1], bufplce2[1], buftexturefracx[1], place = local_vplce[1], bits), light1, shade_constants), dest[1]); local_vplce[1] = place + local_vince[1]; + dest[2] = alpha_blend(shade_bgra(sample_bilinear(bufplce[2], bufplce2[2], buftexturefracx[2], place = local_vplce[2], bits), light2, shade_constants), dest[2]); local_vplce[2] = place + local_vince[2]; + dest[3] = alpha_blend(shade_bgra(sample_bilinear(bufplce[3], bufplce2[3], buftexturefracx[3], place = local_vplce[3], bits), light3, shade_constants), dest[3]); local_vplce[3] = place + local_vince[3]; + dest += pitch; + } while (--count); + } } }; @@ -3719,10 +3782,7 @@ void R_DrawSpan_rgba() #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else - if (!r_bilinear) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); #endif } @@ -3776,10 +3836,7 @@ void vlinec4_rgba() #ifdef NO_SSE DrawerCommandQueue::QueueCommand(); #else - if (!r_bilinear) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); #endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index a266ce878..0900e8997 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -478,6 +478,88 @@ FORCEINLINE uint32_t sample_bilinear(const uint32_t *texture, dsfixed_t xfrac, d return (alpha << 24) | (red << 16) | (green << 8) | blue; } +#ifndef NO_SSE +FORCEINLINE __m128i sample_bilinear4_sse(const uint32_t **col0, const uint32_t **col1, uint32_t texturefracx[4], uint32_t texturefracy[4], int ybits) +{ + uint32_t half = 1 << (ybits - 1); + + __m128i m127 = _mm_set1_epi16(127); + __m128i fg = _mm_setzero_si128(); + for (int i = 0; i < 4; i++) + { + uint32_t y = (texturefracy[i] - half) >> ybits; + + uint32_t inv_b = texturefracx[i]; + uint32_t inv_a = ((texturefracy[i] + half) >> (ybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t ab = a * b; + uint32_t invab = inv_a * b; + uint32_t ainvb = a * inv_b; + uint32_t invainvb = inv_a * inv_b; + __m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab); + __m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb); + + __m128i p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(col0[i] + y)), _mm_setzero_si128()); + __m128i p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(col1[i] + y)), _mm_setzero_si128()); + + __m128i tmp = _mm_adds_epu16(_mm_mullo_epi16(p0, ab_invab), _mm_mullo_epi16(p1, ainvb_invainvb)); + __m128i color = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_srli_si128(tmp, 8), tmp), m127), 8); + + fg = _mm_or_si128(_mm_srli_si128(fg, 4), _mm_slli_si128(_mm_packus_epi16(color, _mm_setzero_si128()), 12)); + } + return fg; +} + +FORCEINLINE __m128i sample_bilinear4_sse(const uint32_t *texture, dsfixed_t &xfrac, dsfixed_t &yfrac, dsfixed_t xstep, dsfixed_t ystep, int xbits, int ybits) +{ + int xshift = (32 - xbits); + int yshift = (32 - ybits); + int xmask = (1 << xshift) - 1; + int ymask = (1 << yshift) - 1; + uint32_t xhalf = 1 << (xbits - 1); + uint32_t yhalf = 1 << (ybits - 1); + + __m128i m127 = _mm_set1_epi16(127); + __m128i fg = _mm_setzero_si128(); + for (int i = 0; i < 4; i++) + { + uint32_t x = (xfrac - xhalf) >> xbits; + uint32_t y = (yfrac - yhalf) >> ybits; + + uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; + uint32_t p01 = texture[(y + 1 & ymask) + ((x & xmask) << yshift)]; + uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; + uint32_t p11 = texture[(y + 1 & ymask) + (((x + 1) & xmask) << yshift)]; + + uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; + uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t ab = a * b; + uint32_t invab = inv_a * b; + uint32_t ainvb = a * inv_b; + uint32_t invainvb = inv_a * inv_b; + __m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab); + __m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb); + + __m128i p0 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, p01, p00), _mm_setzero_si128()); + __m128i p1 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, p11, p10), _mm_setzero_si128()); + + __m128i tmp = _mm_adds_epu16(_mm_mullo_epi16(p0, ab_invab), _mm_mullo_epi16(p1, ainvb_invainvb)); + __m128i color = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_srli_si128(tmp, 8), tmp), m127), 8); + + fg = _mm_or_si128(_mm_srli_si128(fg, 4), _mm_slli_si128(_mm_packus_epi16(color, _mm_setzero_si128()), 12)); + + xfrac += xstep; + yfrac += ystep; + } + return fg; +} +#endif + // Calculate constants for a simple shade with gamma correction #define AVX_LINEAR_SHADE_SIMPLE_INIT(light) \ __m256 mlight_hi = _mm256_set_ps(1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f), 1.0f, light * (1.0f/256.0f), light * (1.0f/256.0f), light * (1.0f/256.0f)); \ diff --git a/src/r_draw_rgba_sse.h b/src/r_draw_rgba_sse.h index 220638c75..721471724 100644 --- a/src/r_draw_rgba_sse.h +++ b/src/r_draw_rgba_sse.h @@ -71,195 +71,284 @@ public: uint32_t light = calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; - if (_xbits == 6 && _ybits == 6) + fixed_t xmagnitude = abs((fixed_t)xstep) >> (32 - _xbits - FRACBITS); + fixed_t ymagnitude = abs((fixed_t)ystep) >> (32 - _ybits - FRACBITS); + fixed_t magnitude = xmagnitude + ymagnitude; + + bool magnifying = !r_bilinear || magnitude >> (FRACBITS - 1) == 0; + if (magnifying) { - // 64x64 is the most common case by far, so special case it. - - int sse_count = count / 4; - count -= sse_count * 4; - - if (shade_constants.simple_shade) + if (_xbits == 6 && _ybits == 6) { - VEC_SHADE_SIMPLE_INIT(light); + // 64x64 is the most common case by far, so special case it. - while (sse_count--) + int sse_count = count / 4; + count -= sse_count * 4; + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT(light); + + while (sse_count--) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + + // Next step in u,v. + dest += 4; + } + } + else + { + VEC_SHADE_INIT(light, shade_constants); + + while (sse_count--) + { + // Current texture index in u,v. + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile, + // re-index using light/colormap. + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + + // Next step in u,v. + dest += 4; + } + } + + if (count == 0) + return; + + do { // Current texture index in u,v. spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - VEC_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); + // Lookup pixel from flat texture tile + *dest++ = shade_bgra(source[spot], light, shade_constants); // Next step in u,v. - dest += 4; - } + xfrac += xstep; + yfrac += ystep; + } while (--count); } else { - VEC_SHADE_INIT(light, shade_constants); + BYTE yshift = 32 - _ybits; + BYTE xshift = yshift - _xbits; + int xmask = ((1 << _xbits) - 1) << _ybits; - while (sse_count--) + int sse_count = count / 4; + count -= sse_count * 4; + + if (shade_constants.simple_shade) + { + VEC_SHADE_SIMPLE_INIT(light); + + while (sse_count--) + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += 4; + } + } + else + { + VEC_SHADE_INIT(light, shade_constants); + + while (sse_count--) + { + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p0 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p1 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p2 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + uint32_t p3 = source[spot]; + xfrac += xstep; + yfrac += ystep; + + // Lookup pixel from flat texture tile + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + dest += 4; + } + } + + if (count == 0) + return; + + do { // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; + spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile, - // re-index using light/colormap. - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - VEC_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); + // Lookup pixel from flat texture tile + *dest++ = shade_bgra(source[spot], light, shade_constants); // Next step in u,v. - dest += 4; - } + xfrac += xstep; + yfrac += ystep; + } while (--count); } - - if (count == 0) - return; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; - - int sse_count = count / 4; - count -= sse_count * 4; - - if (shade_constants.simple_shade) + if (_xbits == 6 && _ybits == 6) { - VEC_SHADE_SIMPLE_INIT(light); + // 64x64 is the most common case by far, so special case it. - while (sse_count--) + int sse_count = count / 4; + count -= sse_count * 4; + + if (shade_constants.simple_shade) { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - VEC_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += 4; + VEC_SHADE_SIMPLE_INIT(light); + while (sse_count--) + { + __m128i fg = sample_bilinear4_sse(source, xfrac, yfrac, xstep, ystep, 26, 26); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += 4; + } } + else + { + VEC_SHADE_INIT(light, shade_constants); + while (sse_count--) + { + __m128i fg = sample_bilinear4_sse(source, xfrac, yfrac, xstep, ystep, 26, 26); + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + dest += 4; + } + } + + if (count == 0) + return; + + do + { + *dest++ = shade_bgra(sample_bilinear(source, xfrac, yfrac, 26, 26), light, shade_constants); + xfrac += xstep; + yfrac += ystep; + } while (--count); } else { - VEC_SHADE_INIT(light, shade_constants); + int sse_count = count / 4; + count -= sse_count * 4; - while (sse_count--) + if (shade_constants.simple_shade) { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p0 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p1 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p2 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - uint32_t p3 = source[spot]; - xfrac += xstep; - yfrac += ystep; - - // Lookup pixel from flat texture tile - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - VEC_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - dest += 4; + VEC_SHADE_SIMPLE_INIT(light); + while (sse_count--) + { + __m128i fg = sample_bilinear4_sse(source, xfrac, yfrac, xstep, ystep, 32 -_xbits, 32 - _ybits); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += 4; + } } + else + { + VEC_SHADE_INIT(light, shade_constants); + while (sse_count--) + { + __m128i fg = sample_bilinear4_sse(source, xfrac, yfrac, xstep, ystep, 32 - _xbits, 32 - _ybits); + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + dest += 4; + } + } + + if (count == 0) + return; + + do + { + *dest++ = shade_bgra(sample_bilinear(source, xfrac, yfrac, 32 - _xbits, 32 - _ybits), light, shade_constants); + xfrac += xstep; + yfrac += ystep; + } while (--count); } - - if (count == 0) - return; - - do - { - // Current texture index in u,v. - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); } } }; @@ -275,6 +364,8 @@ class VecCommand(Vlinec4RGBA) : public DrawerCommand DWORD vplce[4]; DWORD vince[4]; const uint32 * RESTRICT bufplce[4]; + const uint32_t * RESTRICT bufplce2[4]; + uint32_t buftexturefracx[4]; public: VecCommand(Vlinec4RGBA)() @@ -290,6 +381,8 @@ public: vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; bufplce[i] = (const uint32 *)::bufplce[i]; + bufplce2[i] = (const uint32_t *)::bufplce2[i]; + buftexturefracx[i] = ::buftexturefracx[i]; } } @@ -319,57 +412,97 @@ public: local_vince[i] *= thread->num_cores; } - if (shade_constants.simple_shade) + if (bufplce2[0] == nullptr) { - VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - do + if (shade_constants.simple_shade) { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; + VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; - uint32_t p0 = bufplce[0][place0 >> bits]; - uint32_t p1 = bufplce[1][place1 >> bits]; - uint32_t p2 = bufplce[2][place2 >> bits]; - uint32_t p3 = bufplce[3][place3 >> bits]; + uint32_t p0 = bufplce[0][place0 >> bits]; + uint32_t p1 = bufplce[1][place1 >> bits]; + uint32_t p2 = bufplce[2][place2 >> bits]; + uint32_t p3 = bufplce[3][place3 >> bits]; - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - VEC_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + uint32_t p0 = bufplce[0][place0 >> bits]; + uint32_t p1 = bufplce[1][place1 >> bits]; + uint32_t p2 = bufplce[2][place2 >> bits]; + uint32_t p3 = bufplce[3][place3 >> bits]; + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(p3, p2, p1, p0); + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + dest += pitch; + } while (--count); + } } else { - VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - do + if (shade_constants.simple_shade) { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; + VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + do + { + __m128i fg = sample_bilinear4_sse(bufplce, bufplce2, buftexturefracx, local_vplce, bits); - uint32_t p0 = bufplce[0][place0 >> bits]; - uint32_t p1 = bufplce[1][place1 >> bits]; - uint32_t p2 = bufplce[2][place2 >> bits]; - uint32_t p3 = bufplce[3][place3 >> bits]; + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + do + { + __m128i fg = sample_bilinear4_sse(bufplce, bufplce2, buftexturefracx, local_vplce, bits); - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - VEC_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; + + VEC_SHADE(fg, shade_constants); + _mm_storeu_si128((__m128i*)dest, fg); + dest += pitch; + } while (--count); + } } } }; @@ -385,6 +518,8 @@ class VecCommand(Mvlinec4RGBA) : public DrawerCommand DWORD vplce[4]; DWORD vince[4]; const uint32 * RESTRICT bufplce[4]; + const uint32 * RESTRICT bufplce2[4]; + uint32_t buftexturefracx[4]; public: VecCommand(Mvlinec4RGBA)() @@ -400,6 +535,8 @@ public: vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; bufplce[i] = (const uint32 *)::bufplce[i]; + bufplce2[i] = (const uint32_t *)::bufplce2[i]; + buftexturefracx[i] = ::buftexturefracx[i]; } } @@ -429,61 +566,105 @@ public: local_vince[i] *= thread->num_cores; } - if (shade_constants.simple_shade) + if (bufplce2[0] == nullptr) { - VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - do + if (shade_constants.simple_shade) { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; + VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; - uint32_t pix0 = bufplce[0][place0 >> bits]; - uint32_t pix1 = bufplce[1][place1 >> bits]; - uint32_t pix2 = bufplce[2][place2 >> bits]; - uint32_t pix3 = bufplce[3][place3 >> bits]; + uint32_t pix0 = bufplce[0][place0 >> bits]; + uint32_t pix1 = bufplce[1][place1 >> bits]; + uint32_t pix2 = bufplce[2][place2 >> bits]; + uint32_t pix3 = bufplce[3][place3 >> bits]; - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - VEC_SHADE_SIMPLE(fg); - VEC_ALPHA_BLEND(fg, bg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + VEC_SHADE_SIMPLE(fg); + VEC_ALPHA_BLEND(fg, bg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + do + { + DWORD place0 = local_vplce[0]; + DWORD place1 = local_vplce[1]; + DWORD place2 = local_vplce[2]; + DWORD place3 = local_vplce[3]; + + uint32_t pix0 = bufplce[0][place0 >> bits]; + uint32_t pix1 = bufplce[1][place1 >> bits]; + uint32_t pix2 = bufplce[2][place2 >> bits]; + uint32_t pix3 = bufplce[3][place3 >> bits]; + + local_vplce[0] = place0 + local_vince[0]; + local_vplce[1] = place1 + local_vince[1]; + local_vplce[2] = place2 + local_vince[2]; + local_vplce[3] = place3 + local_vince[3]; + + __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + VEC_SHADE(fg, shade_constants); + VEC_ALPHA_BLEND(fg, bg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += pitch; + } while (--count); + } } else { - VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - do + if (shade_constants.simple_shade) { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; + VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); + do + { + __m128i fg = sample_bilinear4_sse(bufplce, bufplce2, buftexturefracx, local_vplce, bits); - uint32_t pix0 = bufplce[0][place0 >> bits]; - uint32_t pix1 = bufplce[1][place1 >> bits]; - uint32_t pix2 = bufplce[2][place2 >> bits]; - uint32_t pix3 = bufplce[3][place3 >> bits]; + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + VEC_SHADE_SIMPLE(fg); + VEC_ALPHA_BLEND(fg, bg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += pitch; + } while (--count); + } + else + { + VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); + do + { + __m128i fg = sample_bilinear4_sse(bufplce, bufplce2, buftexturefracx, local_vplce, bits); - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - VEC_SHADE(fg, shade_constants); - VEC_ALPHA_BLEND(fg, bg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); + local_vplce[0] = local_vplce[0] + local_vince[0]; + local_vplce[1] = local_vplce[1] + local_vince[1]; + local_vplce[2] = local_vplce[2] + local_vince[2]; + local_vplce[3] = local_vplce[3] + local_vince[3]; + + __m128i bg = _mm_loadu_si128((const __m128i*)dest); + VEC_SHADE(fg, shade_constants); + VEC_ALPHA_BLEND(fg, bg); + _mm_storeu_si128((__m128i*)dest, fg); + dest += pitch; + } while (--count); + } } } }; From d15af1524cebd3e000bbd7971d9b5e51205cfde6 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 21 Jun 2016 09:38:47 +0200 Subject: [PATCH 066/100] Added mipmap support for floor and ceiling --- src/r_draw.cpp | 6 +-- src/r_draw.h | 2 +- src/r_draw_rgba.cpp | 119 +++++++++++++++++++++++++++++++++++++----- src/r_draw_rgba.h | 33 ++++++++++++ src/r_draw_rgba_sse.h | 9 ++-- src/r_plane.cpp | 5 +- src/r_swrenderer.cpp | 1 + src/v_draw.cpp | 2 +- 8 files changed, 150 insertions(+), 27 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 83c4ac8d4..73ddb72f8 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -1062,13 +1062,13 @@ extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource; // //========================================================================== -void R_SetSpanSource(const BYTE *pixels) +void R_SetSpanSource(FTexture *tex) { - ds_source = pixels; + R_SetMipmappedSpanSource(tex); #ifdef X86_ASM if (!r_swtruecolor && ds_cursource != ds_source) { - R_SetSpanSource_ASM(pixels); + R_SetSpanSource_ASM(ds_source); } #endif } diff --git a/src/r_draw.h b/src/r_draw.h index d5ecbd289..b662ddcee 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -122,7 +122,7 @@ extern void (*R_DrawTranslatedColumn)(void); extern void (*R_DrawSpan)(void); void R_SetupSpanBits(FTexture *tex); void R_SetSpanColormap(FDynamicColormap *colormap, int shade); -void R_SetSpanSource(const BYTE *pixels); +void R_SetSpanSource(FTexture *tex); // Span drawing for masked textures. extern void (*R_DrawSpanMasked)(void); diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 869edaba1..9cdcdbf80 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -59,6 +59,7 @@ extern int wallshade; CVAR(Bool, r_multithreaded, true, 0) CVAR(Bool, r_bilinear, true, 0) +CVAR(Bool, r_mipmap, true, 0) #ifndef NO_SSE @@ -1502,6 +1503,7 @@ class DrawSpanRGBACommand : public DrawerCommand BYTE * RESTRICT _destorg; fixed_t _light; ShadeConstants _shade_constants; + bool _magnifying; public: DrawSpanRGBACommand() @@ -1519,6 +1521,7 @@ public: _destorg = dc_destorg; _light = ds_light; _shade_constants = ds_shade_constants; + _magnifying = !span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); } void Execute(DrawerThread *thread) override @@ -1548,12 +1551,7 @@ public: uint32_t light = calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; - fixed_t xmagnitude = abs((fixed_t)xstep) >> (32 - _xbits - FRACBITS); - fixed_t ymagnitude = abs((fixed_t)ystep) >> (32 - _ybits - FRACBITS); - fixed_t magnitude = xmagnitude + ymagnitude; - - bool magnifying = !r_bilinear || magnitude >> (FRACBITS - 1) == 0; - if (magnifying) + if (_magnifying) { if (_xbits == 6 && _ybits == 6) { @@ -1634,6 +1632,7 @@ class DrawSpanMaskedRGBACommand : public DrawerCommand fixed_t _ystep; int _xbits; int _ybits; + bool _magnifying; public: DrawSpanMaskedRGBACommand() @@ -1651,6 +1650,7 @@ public: _ystep = ds_ystep; _xbits = ds_xbits; _ybits = ds_ybits; + _magnifying = !span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); } void Execute(DrawerThread *thread) override @@ -1680,12 +1680,7 @@ public: xstep = _xstep; ystep = _ystep; - fixed_t xmagnitude = abs((fixed_t)xstep) >> (32 - _xbits - FRACBITS); - fixed_t ymagnitude = abs((fixed_t)ystep) >> (32 - _ybits - FRACBITS); - fixed_t magnitude = xmagnitude + ymagnitude; - - bool magnifying = !r_bilinear || magnitude >> (FRACBITS - 1) == 0; - if (magnifying) + if (_magnifying) { if (_xbits == 6 && _ybits == 6) { @@ -3677,6 +3672,106 @@ void ApplySpecialColormapRGBACommand::Execute(DrawerThread *thread) ///////////////////////////////////////////////////////////////////////////// +#include + +class MipmappedTexture +{ +public: + MipmappedTexture(FTexture *texture) + { + const uint32_t *base_texture = texture->GetPixelsBgra(); + Width = texture->GetWidth(); + Height = texture->GetHeight(); + Levels = MAX(texture->WidthBits, texture->HeightBits); + + // I bet there is a better way to calculate this.. + int buffersize = 0; + for (int i = 0; i < Levels; i++) + { + int w = MAX(Width >> i, 2); // 2 instead of 1 because we texelGather in 2x2 blocks + int h = MAX(Height >> i, 2); + buffersize += w * h; + } + Pixels.resize(buffersize); + + // Base level: + memcpy(Pixels.data(), base_texture, Width * Height * 4); + + // Mipmap levels: + uint32_t *src = Pixels.data(); + uint32_t *dest = src + Width * Height; + for (int i = 1; i < Levels; i++) + { + int srch = MAX(Height >> (i - 1), 2); + int w = MAX(Width >> i, 2); + int h = MAX(Height >> i, 2); + + for (int x = 0; x < w; x++) + { + for (int y = 0; y < h; y++) + { + uint32_t src00 = src[y * 2 + x * 2 * srch]; + uint32_t src01 = src[y * 2 + 1 + x * 2 * srch]; + uint32_t src10 = src[y * 2 + (x * 2 + 1) * srch]; + uint32_t src11 = src[y * 2 + 1 + (x * 2 + 1) * srch]; + + uint32_t alpha = (APART(src00) + APART(src01) + APART(src10) + APART(src11) + 2) / 4; + uint32_t red = (RPART(src00) + RPART(src01) + RPART(src10) + RPART(src11) + 2) / 4; + uint32_t green = (GPART(src00) + GPART(src01) + GPART(src10) + GPART(src11) + 2) / 4; + uint32_t blue = (BPART(src00) + BPART(src01) + BPART(src10) + BPART(src11) + 2) / 4; + + dest[y + x * h] = (alpha << 24) | (red << 16) | (green << 8) | blue; + } + } + + src = dest; + dest += w * h; + } + } + + int Width = 0; + int Height = 0; + int Levels = 0; + std::vector Pixels; +}; + +class TextureMipmapper +{ +public: + static std::map> &Textures() + { + static std::map> textures; + return textures; + } +}; + +void R_SetMipmappedSpanSource(FTexture *tex) +{ + if (r_swtruecolor) + { + if (r_mipmap) + { + auto &mipmap = TextureMipmapper::Textures()[tex]; + if (!mipmap) + mipmap = std::make_shared(tex); + ds_source = (const BYTE*)mipmap->Pixels.data(); + } + else + { + ds_source = (const BYTE*)tex->GetPixelsBgra(); + } + } + else + { + ds_source = tex->GetPixels(); + } +} + +void R_ClearMipmapCache() +{ + TextureMipmapper::Textures().clear(); +} + void R_BeginDrawerCommands() { DrawerCommandQueue::Begin(); diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 0900e8997..37dc1a70a 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -108,6 +108,9 @@ void tmvline4_revsubclamp_rgba(); void R_FillColumnHoriz_rgba(); void R_FillSpan_rgba(); +void R_SetMipmappedSpanSource(FTexture *tex); +void R_ClearMipmapCache(); + ///////////////////////////////////////////////////////////////////////////// // Multithreaded rendering infrastructure: @@ -185,6 +188,7 @@ public: }; EXTERN_CVAR(Bool, r_multithreaded) +EXTERN_CVAR(Bool, r_mipmap) // Manages queueing up commands and executing them on worker threads class DrawerCommandQueue @@ -426,6 +430,35 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) return 0xff000000 | (red << 16) | (green << 8) | blue; } +inline bool span_sampler_setup(const uint32_t *&source, int &xbits, int &ybits, fixed_t xstep, fixed_t ystep) +{ + if (!r_bilinear) + return false; + + // Is this a magfilter or minfilter? + fixed_t xmagnitude = abs(xstep) >> (32 - xbits - FRACBITS); + fixed_t ymagnitude = abs(ystep) >> (32 - ybits - FRACBITS); + fixed_t magnitude = (xmagnitude + ymagnitude) * 3 + (1 << (FRACBITS -1)); + if (magnitude >> FRACBITS == 0) + return false; + + if (r_mipmap) + { + int level = magnitude >> (FRACBITS + 1); + while (level != 0) + { + if (xbits <= 2 || ybits <= 2) + break; + + source += (1 << (xbits)) * (1 << (ybits)); + xbits -= 1; + ybits -= 1; + level >>= 1; + } + } + return true; +} + FORCEINLINE uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, int ybits) { uint32_t half = 1 << (ybits - 1); diff --git a/src/r_draw_rgba_sse.h b/src/r_draw_rgba_sse.h index 721471724..4002a5535 100644 --- a/src/r_draw_rgba_sse.h +++ b/src/r_draw_rgba_sse.h @@ -25,6 +25,7 @@ class VecCommand(DrawSpanRGBA) : public DrawerCommand BYTE * RESTRICT _destorg; fixed_t _light; ShadeConstants _shade_constants; + bool _magnifying; public: VecCommand(DrawSpanRGBA)() @@ -42,6 +43,7 @@ public: _destorg = dc_destorg; _light = ds_light; _shade_constants = ds_shade_constants; + _magnifying = !span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); } void Execute(DrawerThread *thread) override @@ -71,12 +73,7 @@ public: uint32_t light = calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; - fixed_t xmagnitude = abs((fixed_t)xstep) >> (32 - _xbits - FRACBITS); - fixed_t ymagnitude = abs((fixed_t)ystep) >> (32 - _ybits - FRACBITS); - fixed_t magnitude = xmagnitude + ymagnitude; - - bool magnifying = !r_bilinear || magnitude >> (FRACBITS - 1) == 0; - if (magnifying) + if (_magnifying) { if (_xbits == 6 && _ybits == 6) { diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 807066f77..6913db918 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -1178,10 +1178,7 @@ void R_DrawSinglePlane (visplane_t *pl, fixed_t alpha, bool additive, bool maske R_SetupSpanBits(tex); double xscale = pl->xform.xScale * tex->Scale.X; double yscale = pl->xform.yScale * tex->Scale.Y; - if (r_swtruecolor) - ds_source = (const BYTE*)tex->GetPixelsBgra(); - else - ds_source = tex->GetPixels(); + R_SetSpanSource(tex); basecolormap = pl->colormap; planeshade = LIGHT2SHADE(pl->lightlevel); diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index c81d2a110..c1e2d4bd0 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -170,6 +170,7 @@ void FSoftwareRenderer::RenderView(player_t *player) R_InitColumnDrawers(); } + R_ClearMipmapCache(); R_BeginDrawerCommands(); R_RenderActorView (player->mo); // [RH] Let cameras draw onto textures that were visible this frame. diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 6a8dad047..fd12a1587 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -1404,7 +1404,7 @@ void DCanvas::FillSimplePoly(FTexture *tex, FVector2 *points, int npoints, R_SetSpanColormap(colormap, clamp(shade >> FRACBITS, 0, NUMCOLORMAPS - 1)); else R_SetSpanColormap(&identitycolormap, 0); - R_SetSpanSource(r_swtruecolor ? (const BYTE*)tex->GetPixelsBgra() : tex->GetPixels()); + R_SetSpanSource(tex); scalex = double(1u << (32 - ds_xbits)) / scalex; scaley = double(1u << (32 - ds_ybits)) / scaley; ds_xstep = xs_RoundToInt(cosrot * scalex); From c235de5c22f6f7aebbf36aa3f80a45e0f5f6accf Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 21 Jun 2016 21:55:08 +0200 Subject: [PATCH 067/100] Native mipmap support to FTexture --- src/r_draw.cpp | 2 +- src/r_draw_rgba.cpp | 100 ---------------------- src/r_draw_rgba.h | 157 +++++++++++++++++------------------ src/r_draw_rgba_sse.h | 28 +++++-- src/r_swrenderer.cpp | 1 - src/textures/jpegtexture.cpp | 4 +- src/textures/pngtexture.cpp | 3 +- src/textures/texture.cpp | 68 ++++++++++++++- src/textures/textures.h | 4 + 9 files changed, 171 insertions(+), 196 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 73ddb72f8..55353a006 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -1064,7 +1064,7 @@ extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource; void R_SetSpanSource(FTexture *tex) { - R_SetMipmappedSpanSource(tex); + ds_source = r_swtruecolor ? (const BYTE*)tex->GetPixelsBgra() : tex->GetPixels(); #ifdef X86_ASM if (!r_swtruecolor && ds_cursource != ds_source) { diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 9cdcdbf80..8144c096d 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -3672,106 +3672,6 @@ void ApplySpecialColormapRGBACommand::Execute(DrawerThread *thread) ///////////////////////////////////////////////////////////////////////////// -#include - -class MipmappedTexture -{ -public: - MipmappedTexture(FTexture *texture) - { - const uint32_t *base_texture = texture->GetPixelsBgra(); - Width = texture->GetWidth(); - Height = texture->GetHeight(); - Levels = MAX(texture->WidthBits, texture->HeightBits); - - // I bet there is a better way to calculate this.. - int buffersize = 0; - for (int i = 0; i < Levels; i++) - { - int w = MAX(Width >> i, 2); // 2 instead of 1 because we texelGather in 2x2 blocks - int h = MAX(Height >> i, 2); - buffersize += w * h; - } - Pixels.resize(buffersize); - - // Base level: - memcpy(Pixels.data(), base_texture, Width * Height * 4); - - // Mipmap levels: - uint32_t *src = Pixels.data(); - uint32_t *dest = src + Width * Height; - for (int i = 1; i < Levels; i++) - { - int srch = MAX(Height >> (i - 1), 2); - int w = MAX(Width >> i, 2); - int h = MAX(Height >> i, 2); - - for (int x = 0; x < w; x++) - { - for (int y = 0; y < h; y++) - { - uint32_t src00 = src[y * 2 + x * 2 * srch]; - uint32_t src01 = src[y * 2 + 1 + x * 2 * srch]; - uint32_t src10 = src[y * 2 + (x * 2 + 1) * srch]; - uint32_t src11 = src[y * 2 + 1 + (x * 2 + 1) * srch]; - - uint32_t alpha = (APART(src00) + APART(src01) + APART(src10) + APART(src11) + 2) / 4; - uint32_t red = (RPART(src00) + RPART(src01) + RPART(src10) + RPART(src11) + 2) / 4; - uint32_t green = (GPART(src00) + GPART(src01) + GPART(src10) + GPART(src11) + 2) / 4; - uint32_t blue = (BPART(src00) + BPART(src01) + BPART(src10) + BPART(src11) + 2) / 4; - - dest[y + x * h] = (alpha << 24) | (red << 16) | (green << 8) | blue; - } - } - - src = dest; - dest += w * h; - } - } - - int Width = 0; - int Height = 0; - int Levels = 0; - std::vector Pixels; -}; - -class TextureMipmapper -{ -public: - static std::map> &Textures() - { - static std::map> textures; - return textures; - } -}; - -void R_SetMipmappedSpanSource(FTexture *tex) -{ - if (r_swtruecolor) - { - if (r_mipmap) - { - auto &mipmap = TextureMipmapper::Textures()[tex]; - if (!mipmap) - mipmap = std::make_shared(tex); - ds_source = (const BYTE*)mipmap->Pixels.data(); - } - else - { - ds_source = (const BYTE*)tex->GetPixelsBgra(); - } - } - else - { - ds_source = tex->GetPixels(); - } -} - -void R_ClearMipmapCache() -{ - TextureMipmapper::Textures().clear(); -} - void R_BeginDrawerCommands() { DrawerCommandQueue::Begin(); diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 37dc1a70a..4808cb257 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -108,9 +108,6 @@ void tmvline4_revsubclamp_rgba(); void R_FillColumnHoriz_rgba(); void R_FillSpan_rgba(); -void R_SetMipmappedSpanSource(FTexture *tex); -void R_ClearMipmapCache(); - ///////////////////////////////////////////////////////////////////////////// // Multithreaded rendering infrastructure: @@ -494,9 +491,9 @@ FORCEINLINE uint32_t sample_bilinear(const uint32_t *texture, dsfixed_t xfrac, d uint32_t y = (yfrac - yhalf) >> ybits; uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; - uint32_t p01 = texture[(y + 1 & ymask) + ((x & xmask) << yshift)]; + uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)]; uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; - uint32_t p11 = texture[(y + 1 & ymask) + (((x + 1) & xmask) << yshift)]; + uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)]; uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; @@ -511,87 +508,81 @@ FORCEINLINE uint32_t sample_bilinear(const uint32_t *texture, dsfixed_t xfrac, d return (alpha << 24) | (red << 16) | (green << 8) | blue; } -#ifndef NO_SSE -FORCEINLINE __m128i sample_bilinear4_sse(const uint32_t **col0, const uint32_t **col1, uint32_t texturefracx[4], uint32_t texturefracy[4], int ybits) -{ - uint32_t half = 1 << (ybits - 1); - - __m128i m127 = _mm_set1_epi16(127); - __m128i fg = _mm_setzero_si128(); - for (int i = 0; i < 4; i++) - { - uint32_t y = (texturefracy[i] - half) >> ybits; - - uint32_t inv_b = texturefracx[i]; - uint32_t inv_a = ((texturefracy[i] + half) >> (ybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t ab = a * b; - uint32_t invab = inv_a * b; - uint32_t ainvb = a * inv_b; - uint32_t invainvb = inv_a * inv_b; - __m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab); - __m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb); - - __m128i p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(col0[i] + y)), _mm_setzero_si128()); - __m128i p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(col1[i] + y)), _mm_setzero_si128()); - - __m128i tmp = _mm_adds_epu16(_mm_mullo_epi16(p0, ab_invab), _mm_mullo_epi16(p1, ainvb_invainvb)); - __m128i color = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_srli_si128(tmp, 8), tmp), m127), 8); - - fg = _mm_or_si128(_mm_srli_si128(fg, 4), _mm_slli_si128(_mm_packus_epi16(color, _mm_setzero_si128()), 12)); - } - return fg; +#define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, ybits) { \ + uint32_t half = 1 << (ybits - 1); \ + \ + __m128i m127 = _mm_set1_epi16(127); \ + fg = _mm_setzero_si128(); \ + for (int i = 0; i < 4; i++) \ + { \ + uint32_t y = (texturefracy[i] - half) >> ybits; \ + \ + uint32_t inv_b = texturefracx[i]; \ + uint32_t inv_a = ((texturefracy[i] + half) >> (ybits - 4)) & 15; \ + uint32_t a = 16 - inv_a; \ + uint32_t b = 16 - inv_b; \ + \ + uint32_t ab = a * b; \ + uint32_t invab = inv_a * b; \ + uint32_t ainvb = a * inv_b; \ + uint32_t invainvb = inv_a * inv_b; \ + __m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab); \ + __m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb); \ + \ + __m128i p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(col0[i] + y)), _mm_setzero_si128()); \ + __m128i p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(col1[i] + y)), _mm_setzero_si128()); \ + \ + __m128i tmp = _mm_adds_epu16(_mm_mullo_epi16(p0, ab_invab), _mm_mullo_epi16(p1, ainvb_invainvb)); \ + __m128i color = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_srli_si128(tmp, 8), tmp), m127), 8); \ + \ + fg = _mm_or_si128(_mm_srli_si128(fg, 4), _mm_slli_si128(_mm_packus_epi16(color, _mm_setzero_si128()), 12)); \ + } \ } -FORCEINLINE __m128i sample_bilinear4_sse(const uint32_t *texture, dsfixed_t &xfrac, dsfixed_t &yfrac, dsfixed_t xstep, dsfixed_t ystep, int xbits, int ybits) -{ - int xshift = (32 - xbits); - int yshift = (32 - ybits); - int xmask = (1 << xshift) - 1; - int ymask = (1 << yshift) - 1; - uint32_t xhalf = 1 << (xbits - 1); - uint32_t yhalf = 1 << (ybits - 1); - - __m128i m127 = _mm_set1_epi16(127); - __m128i fg = _mm_setzero_si128(); - for (int i = 0; i < 4; i++) - { - uint32_t x = (xfrac - xhalf) >> xbits; - uint32_t y = (yfrac - yhalf) >> ybits; - - uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; - uint32_t p01 = texture[(y + 1 & ymask) + ((x & xmask) << yshift)]; - uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; - uint32_t p11 = texture[(y + 1 & ymask) + (((x + 1) & xmask) << yshift)]; - - uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; - uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; - - uint32_t ab = a * b; - uint32_t invab = inv_a * b; - uint32_t ainvb = a * inv_b; - uint32_t invainvb = inv_a * inv_b; - __m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab); - __m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb); - - __m128i p0 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, p01, p00), _mm_setzero_si128()); - __m128i p1 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, p11, p10), _mm_setzero_si128()); - - __m128i tmp = _mm_adds_epu16(_mm_mullo_epi16(p0, ab_invab), _mm_mullo_epi16(p1, ainvb_invainvb)); - __m128i color = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_srli_si128(tmp, 8), tmp), m127), 8); - - fg = _mm_or_si128(_mm_srli_si128(fg, 4), _mm_slli_si128(_mm_packus_epi16(color, _mm_setzero_si128()), 12)); - - xfrac += xstep; - yfrac += ystep; - } - return fg; +#define VEC_SAMPLE_BILINEAR4_SPAN(fg, texture, xfrac, yfrac, xstep, ystep, xbits, ybits) { \ + int xshift = (32 - xbits); \ + int yshift = (32 - ybits); \ + int xmask = (1 << xshift) - 1; \ + int ymask = (1 << yshift) - 1; \ + uint32_t xhalf = 1 << (xbits - 1); \ + uint32_t yhalf = 1 << (ybits - 1); \ + \ + __m128i m127 = _mm_set1_epi16(127); \ + fg = _mm_setzero_si128(); \ + for (int i = 0; i < 4; i++) \ + { \ + uint32_t x = (xfrac - xhalf) >> xbits; \ + uint32_t y = (yfrac - yhalf) >> ybits; \ + \ + uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; \ + uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)]; \ + uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; \ + uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)]; \ + \ + uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; \ + uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; \ + uint32_t a = 16 - inv_a; \ + uint32_t b = 16 - inv_b; \ + \ + uint32_t ab = a * b; \ + uint32_t invab = inv_a * b; \ + uint32_t ainvb = a * inv_b; \ + uint32_t invainvb = inv_a * inv_b; \ + __m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab); \ + __m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb); \ + \ + __m128i p0 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, p01, p00), _mm_setzero_si128()); \ + __m128i p1 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, p11, p10), _mm_setzero_si128()); \ + \ + __m128i tmp = _mm_adds_epu16(_mm_mullo_epi16(p0, ab_invab), _mm_mullo_epi16(p1, ainvb_invainvb)); \ + __m128i color = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_srli_si128(tmp, 8), tmp), m127), 8); \ + \ + fg = _mm_or_si128(_mm_srli_si128(fg, 4), _mm_slli_si128(_mm_packus_epi16(color, _mm_setzero_si128()), 12)); \ + \ + xfrac += xstep; \ + yfrac += ystep; \ + } \ } -#endif // Calculate constants for a simple shade with gamma correction #define AVX_LINEAR_SHADE_SIMPLE_INIT(light) \ diff --git a/src/r_draw_rgba_sse.h b/src/r_draw_rgba_sse.h index 4002a5535..af761c6e7 100644 --- a/src/r_draw_rgba_sse.h +++ b/src/r_draw_rgba_sse.h @@ -280,7 +280,8 @@ public: VEC_SHADE_SIMPLE_INIT(light); while (sse_count--) { - __m128i fg = sample_bilinear4_sse(source, xfrac, yfrac, xstep, ystep, 26, 26); + __m128i fg; + VEC_SAMPLE_BILINEAR4_SPAN(fg, source, xfrac, yfrac, xstep, ystep, 26, 26); VEC_SHADE_SIMPLE(fg); _mm_storeu_si128((__m128i*)dest, fg); dest += 4; @@ -291,7 +292,8 @@ public: VEC_SHADE_INIT(light, shade_constants); while (sse_count--) { - __m128i fg = sample_bilinear4_sse(source, xfrac, yfrac, xstep, ystep, 26, 26); + __m128i fg; + VEC_SAMPLE_BILINEAR4_SPAN(fg, source, xfrac, yfrac, xstep, ystep, 26, 26); VEC_SHADE(fg, shade_constants); _mm_storeu_si128((__m128i*)dest, fg); dest += 4; @@ -318,7 +320,10 @@ public: VEC_SHADE_SIMPLE_INIT(light); while (sse_count--) { - __m128i fg = sample_bilinear4_sse(source, xfrac, yfrac, xstep, ystep, 32 -_xbits, 32 - _ybits); + __m128i fg; + int tmpx = 32 - _xbits; + int tmpy = 32 - _ybits; + VEC_SAMPLE_BILINEAR4_SPAN(fg, source, xfrac, yfrac, xstep, ystep, tmpx, tmpy); VEC_SHADE_SIMPLE(fg); _mm_storeu_si128((__m128i*)dest, fg); dest += 4; @@ -329,7 +334,10 @@ public: VEC_SHADE_INIT(light, shade_constants); while (sse_count--) { - __m128i fg = sample_bilinear4_sse(source, xfrac, yfrac, xstep, ystep, 32 - _xbits, 32 - _ybits); + __m128i fg; + int tmpx = 32 - _xbits; + int tmpy = 32 - _ybits; + VEC_SAMPLE_BILINEAR4_SPAN(fg, source, xfrac, yfrac, xstep, ystep, tmpx, tmpy); VEC_SHADE(fg, shade_constants); _mm_storeu_si128((__m128i*)dest, fg); dest += 4; @@ -471,7 +479,8 @@ public: VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); do { - __m128i fg = sample_bilinear4_sse(bufplce, bufplce2, buftexturefracx, local_vplce, bits); + __m128i fg; + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -488,7 +497,8 @@ public: VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); do { - __m128i fg = sample_bilinear4_sse(bufplce, bufplce2, buftexturefracx, local_vplce, bits); + __m128i fg; + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -629,7 +639,8 @@ public: VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); do { - __m128i fg = sample_bilinear4_sse(bufplce, bufplce2, buftexturefracx, local_vplce, bits); + __m128i fg; + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -648,7 +659,8 @@ public: VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); do { - __m128i fg = sample_bilinear4_sse(bufplce, bufplce2, buftexturefracx, local_vplce, bits); + __m128i fg; + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index c1e2d4bd0..c81d2a110 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -170,7 +170,6 @@ void FSoftwareRenderer::RenderView(player_t *player) R_InitColumnDrawers(); } - R_ClearMipmapCache(); R_BeginDrawerCommands(); R_RenderActorView (player->mo); // [RH] Let cameras draw onto textures that were visible this frame. diff --git a/src/textures/jpegtexture.cpp b/src/textures/jpegtexture.cpp index 3b5359846..f44b34d08 100644 --- a/src/textures/jpegtexture.cpp +++ b/src/textures/jpegtexture.cpp @@ -474,7 +474,7 @@ void FJPEGTexture::MakeTextureBgra() jpeg_decompress_struct cinfo; jpeg_error_mgr jerr; - PixelsBgra.resize(Width * Height, 0xffba0000); + CreatePixelsBgraWithMipmaps(); cinfo.err = jpeg_std_error(&jerr); cinfo.err->output_message = JPEG_OutputMessage; @@ -560,6 +560,8 @@ void FJPEGTexture::MakeTextureBgra() { delete[] buff; } + + GenerateBgraMipmaps(); } diff --git a/src/textures/pngtexture.cpp b/src/textures/pngtexture.cpp index 408cf1e2f..ee4eabe90 100644 --- a/src/textures/pngtexture.cpp +++ b/src/textures/pngtexture.cpp @@ -633,7 +633,7 @@ void FPNGTexture::MakeTextureBgra () lump = new FileReader(SourceFile.GetChars()); } - PixelsBgra.resize(Width * Height, 0xffff0000); + CreatePixelsBgraWithMipmaps(); if (StartOfIDAT != 0) { DWORD len, id; @@ -757,6 +757,7 @@ void FPNGTexture::MakeTextureBgra () } } delete lump; + GenerateBgraMipmaps(); } //=========================================================================== diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index 16a9e63a6..f5e4d4aa8 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -200,7 +200,7 @@ const uint32_t *FTexture::GetPixelsBgra() const BYTE *indices = GetPixels(); if (indices == nullptr) return nullptr; - PixelsBgra.resize(Width * Height); + CreatePixelsBgraWithMipmaps(); for (int i = 0; i < Width * Height; i++) { if (indices[i] != 0) @@ -208,6 +208,7 @@ const uint32_t *FTexture::GetPixelsBgra() else PixelsBgra[i] = 0; } + GenerateBgraMipmaps(); } return PixelsBgra.data(); } @@ -355,6 +356,71 @@ void FTexture::FreeSpans (Span **spans) const M_Free (spans); } +void FTexture::CreatePixelsBgraWithMipmaps() +{ + int levels = MipmapLevels(); + int buffersize = 0; + for (int i = 0; i < levels; i++) + { + int w = MAX(Width >> i, 1); + int h = MAX(Height >> i, 1); + buffersize += w * h; + } + PixelsBgra.resize(buffersize, 0xffff0000); +} + +int FTexture::MipmapLevels() const +{ + int widthbits = 0; + while ((Width >> widthbits) != 0) widthbits++; + + int heightbits = 0; + while ((Height >> heightbits) != 0) heightbits++; + + return MAX(widthbits, heightbits); +} + +void FTexture::GenerateBgraMipmaps() +{ + uint32_t *src = PixelsBgra.data(); + uint32_t *dest = src + Width * Height; + int levels = MipmapLevels(); + for (int i = 1; i < levels; i++) + { + int srcw = MAX(Width >> (i - 1), 1); + int srch = MAX(Height >> (i - 1), 1); + int w = MAX(Width >> i, 1); + int h = MAX(Height >> i, 1); + + for (int x = 0; x < w; x++) + { + int sx0 = x * 2; + int sx1 = MIN((x + 1) * 2, srcw - 1); + + for (int y = 0; y < h; y++) + { + int sy0 = y * 2; + int sy1 = MIN((y + 1) * 2, srch - 1); + + uint32_t src00 = src[sy0 + sx0 * srch]; + uint32_t src01 = src[sy1 + sx0 * srch]; + uint32_t src10 = src[sy0 + sx1 * srch]; + uint32_t src11 = src[sy1 + sx1 * srch]; + + uint32_t alpha = (APART(src00) + APART(src01) + APART(src10) + APART(src11) + 2) / 4; + uint32_t red = (RPART(src00) + RPART(src01) + RPART(src10) + RPART(src11) + 2) / 4; + uint32_t green = (GPART(src00) + GPART(src01) + GPART(src10) + GPART(src11) + 2) / 4; + uint32_t blue = (BPART(src00) + BPART(src01) + BPART(src10) + BPART(src11) + 2) / 4; + + dest[y + x * h] = (alpha << 24) | (red << 16) | (green << 8) | blue; + } + } + + src = dest; + dest += w * h; + } +} + void FTexture::CopyToBlock (BYTE *dest, int dwidth, int dheight, int xpos, int ypos, int rotate, const BYTE *translation) { const BYTE *pixels = GetPixels(); diff --git a/src/textures/textures.h b/src/textures/textures.h index 3b4b0b8b3..ab9dc3719 100644 --- a/src/textures/textures.h +++ b/src/textures/textures.h @@ -271,6 +271,10 @@ protected: std::vector PixelsBgra; + void CreatePixelsBgraWithMipmaps(); + void GenerateBgraMipmaps(); + int MipmapLevels() const; + public: static void FlipSquareBlock (BYTE *block, int x, int y); static void FlipSquareBlockBgra (uint32_t *block, int x, int y); From 4142b6ed1b1dc858a4a7ab16ca2f01a79567ce3b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 21 Jun 2016 22:03:34 +0200 Subject: [PATCH 068/100] GCC compile fix --- src/r_draw_rgba.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 4808cb257..617e83107 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -427,7 +427,7 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) return 0xff000000 | (red << 16) | (green << 8) | blue; } -inline bool span_sampler_setup(const uint32_t *&source, int &xbits, int &ybits, fixed_t xstep, fixed_t ystep) +inline bool span_sampler_setup(const uint32_t * RESTRICT &source, int &xbits, int &ybits, fixed_t xstep, fixed_t ystep) { if (!r_bilinear) return false; From f81042b3e20a2d9d300c0431d0bb094435eef340 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 21 Jun 2016 22:10:04 +0200 Subject: [PATCH 069/100] Fix warning generated by gcc --- src/r_draw_rgba.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 8144c096d..57b32b28c 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -1722,7 +1722,8 @@ public: // 64x64 is the most common case by far, so special case it. do { - *dest++ = alpha_blend(shade_bgra(sample_bilinear(source, xfrac, yfrac, 26, 26), light, shade_constants), *dest); + *dest = alpha_blend(shade_bgra(sample_bilinear(source, xfrac, yfrac, 26, 26), light, shade_constants), *dest); + dest++; xfrac += xstep; yfrac += ystep; } while (--count); @@ -1734,7 +1735,8 @@ public: int xmask = ((1 << _xbits) - 1) << _ybits; do { - *dest++ = alpha_blend(shade_bgra(sample_bilinear(source, xfrac, yfrac, 32 - _xbits, 32 - _ybits), light, shade_constants), *dest); + *dest = alpha_blend(shade_bgra(sample_bilinear(source, xfrac, yfrac, 32 - _xbits, 32 - _ybits), light, shade_constants), *dest); + dest++; xfrac += xstep; yfrac += ystep; } while (--count); From e294906d692e4eee921d35e013fafdd633f42257 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 22 Jun 2016 00:22:06 +0200 Subject: [PATCH 070/100] Voxel support in true color mode --- src/r_draw.cpp | 13 ++++ src/r_draw.h | 18 +++--- src/r_draw_rgba.cpp | 142 ++++++++++++++++++++++++++++++++++++++++++++ src/r_draw_rgba.h | 3 + src/r_things.cpp | 6 +- src/r_things.h | 2 +- 6 files changed, 171 insertions(+), 13 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 55353a006..8cca13289 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -100,6 +100,8 @@ void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip); void (*R_MapTiltedPlane)(int y, int x1); void (*R_MapColoredPlane)(int y, int x1); void (*R_DrawParticle)(vissprite_t *); +void (*R_SetupDrawSlab)(FColormap *base_colormap, float light, int shade); +void (*R_DrawSlab)(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); fixed_t (*tmvline1_add)(); void (*tmvline4_add)(); fixed_t (*tmvline1_addclamp)(); @@ -2306,6 +2308,9 @@ void R_InitColumnDrawers () R_MapColoredPlane = R_MapColoredPlane_rgba; R_DrawParticle = R_DrawParticle_rgba; + R_SetupDrawSlab = R_SetupDrawSlab_rgba; + R_DrawSlab = R_DrawSlab_rgba; + tmvline1_add = tmvline1_add_rgba; tmvline4_add = tmvline4_add_rgba; tmvline1_addclamp = tmvline1_addclamp_rgba; @@ -2403,6 +2408,14 @@ void R_InitColumnDrawers () R_MapColoredPlane = R_MapColoredPlane_C; R_DrawParticle = R_DrawParticle_C; +#ifdef X86_ASM + R_SetupDrawSlab = [](FColormap *colormap, float light, int shade) { R_SetupDrawSlabA(colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT)); }; + R_DrawSlab = R_DrawSlabA; +#else + R_SetupDrawSlab = [](FColormap *colormap, float light, int shade) { R_SetupDrawSlabC(colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT)); }; + R_DrawSlab = R_DrawSlabC; +#endif + tmvline1_add = tmvline1_add_C; tmvline4_add = tmvline4_add_C; tmvline1_addclamp = tmvline1_addclamp_C; diff --git a/src/r_draw.h b/src/r_draw.h index b662ddcee..547a044ea 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -286,16 +286,16 @@ void R_FillColumnP_C (void); void R_FillColumnHorizP_C (void); void R_FillSpan_C (void); -#ifdef X86_ASM -#define R_SetupDrawSlab R_SetupDrawSlabA -#define R_DrawSlab R_DrawSlabA -#else -#define R_SetupDrawSlab R_SetupDrawSlabC -#define R_DrawSlab R_DrawSlabC -#endif +extern void(*R_SetupDrawSlab)(FColormap *base_colormap, float light, int shade); +extern void(*R_DrawSlab)(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); -extern "C" void R_SetupDrawSlab(const BYTE *colormap); -extern "C" void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); +#ifdef X86_ASM +extern "C" void R_SetupDrawSlabA(const BYTE *colormap); +extern "C" void R_DrawSlabA(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); +#else +extern "C" void R_SetupDrawSlabC(const BYTE *colormap); +extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); +#endif extern "C" int ds_y; extern "C" int ds_x1; diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 57b32b28c..9603a8b3e 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -2303,6 +2303,123 @@ public: } }; +class DrawSlabRGBACommand : public DrawerCommand +{ + int _dx; + fixed_t _v; + int _dy; + fixed_t _vi; + const BYTE *_vptr; + uint32_t *_p; + ShadeConstants _shade_constants; + const BYTE *_colormap; + fixed_t _light; + int _pitch; + int _start_y; + +public: + DrawSlabRGBACommand(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p, ShadeConstants shade_constants, const BYTE *colormap, fixed_t light) + { + _dx = dx; + _v = v; + _dy = dy; + _vi = vi; + _vptr = vptr; + _p = (uint32_t *)p; + _shade_constants = shade_constants; + _colormap = colormap; + _light = light; + _pitch = dc_pitch; + _start_y = static_cast((p - dc_destorg) / (dc_pitch * 4)); + assert(dx > 0); + } + + void Execute(DrawerThread *thread) override + { + int dx = _dx; + fixed_t v = _v; + int dy = _dy; + fixed_t vi = _vi; + const BYTE *vptr = _vptr; + uint32_t *p = _p; + ShadeConstants shade_constants = _shade_constants; + const BYTE *colormap = _colormap; + uint32_t light = calc_light_multiplier(_light); + int pitch = _pitch; + int x; + + dy = thread->count_for_thread(_start_y, dy); + p = thread->dest_for_thread(_start_y, pitch, p); + v += vi * thread->skipped_by_thread(_start_y); + vi *= thread->num_cores; + pitch *= thread->num_cores; + + if (dx == 1) + { + while (dy > 0) + { + *p = shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); + p += pitch; + v += vi; + dy--; + } + } + else if (dx == 2) + { + while (dy > 0) + { + uint32_t color = shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); + p[0] = color; + p[1] = color; + p += pitch; + v += vi; + dy--; + } + } + else if (dx == 3) + { + while (dy > 0) + { + uint32_t color = shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); + p[0] = color; + p[1] = color; + p[2] = color; + p += pitch; + v += vi; + dy--; + } + } + else if (dx == 4) + { + while (dy > 0) + { + uint32_t color = shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); + p[0] = color; + p[1] = color; + p[2] = color; + p[3] = color; + p += pitch; + v += vi; + dy--; + } + } + else while (dy > 0) + { + uint32_t color = shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); + // The optimizer will probably turn this into a memset call. + // Since dx is not likely to be large, I'm not sure that's a good thing, + // hence the alternatives above. + for (x = 0; x < dx; x++) + { + p[x] = color; + } + p += pitch; + v += vi; + dy--; + } + } +}; + class Vlinec1RGBACommand : public DrawerCommand { DWORD _iscale; @@ -3813,6 +3930,31 @@ void R_FillSpan_rgba() DrawerCommandQueue::QueueCommand(); } +static ShadeConstants slab_rgba_shade_constants; +static const BYTE *slab_rgba_colormap; +static fixed_t slab_rgba_light; + +void R_SetupDrawSlab_rgba(FColormap *base_colormap, float light, int shade) +{ + slab_rgba_shade_constants.light_red = base_colormap->Color.r * 256 / 255; + slab_rgba_shade_constants.light_green = base_colormap->Color.g * 256 / 255; + slab_rgba_shade_constants.light_blue = base_colormap->Color.b * 256 / 255; + slab_rgba_shade_constants.light_alpha = base_colormap->Color.a * 256 / 255; + slab_rgba_shade_constants.fade_red = base_colormap->Fade.r; + slab_rgba_shade_constants.fade_green = base_colormap->Fade.g; + slab_rgba_shade_constants.fade_blue = base_colormap->Fade.b; + slab_rgba_shade_constants.fade_alpha = base_colormap->Fade.a; + slab_rgba_shade_constants.desaturate = MIN(abs(base_colormap->Desaturate), 255) * 255 / 256; + slab_rgba_shade_constants.simple_shade = (base_colormap->Color.d == 0x00ffffff && base_colormap->Fade.d == 0x00000000 && base_colormap->Desaturate == 0); + slab_rgba_colormap = base_colormap->Maps; + slab_rgba_light = LIGHTSCALE(light, shade); +} + +void R_DrawSlab_rgba(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p) +{ + DrawerCommandQueue::QueueCommand(dx, v, dy, vi, vptr, p, slab_rgba_shade_constants, slab_rgba_colormap, slab_rgba_light); +} + //extern FTexture *rw_pic; // For the asserts below DWORD vlinec1_rgba() diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 617e83107..c94cb1e4b 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -90,6 +90,9 @@ void R_DrawSpanAddClamp_rgba(); void R_DrawSpanMaskedAddClamp_rgba(); void R_FillSpan_rgba(); +void R_SetupDrawSlab_rgba(FColormap *base_colormap, float light, int shade); +void R_DrawSlab_rgba(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); + void R_DrawFogBoundary_rgba(int x1, int x2, short *uclip, short *dclip); DWORD vlinec1_rgba(); diff --git a/src/r_things.cpp b/src/r_things.cpp index f6a1a709f..e1f1017f3 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -688,7 +688,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop // Render the voxel, either directly to the screen or offscreen. R_DrawVoxel(spr->pa.vpos, spr->pa.vang, spr->gpos, spr->Angle, - spr->xscale, FLOAT2FIXED(spr->yscale), spr->voxel, spr->Style.BaseColormap->Maps + (spr->Style.ColormapNum << COLORMAPSHIFT), cliptop, clipbot, + spr->xscale, FLOAT2FIXED(spr->yscale), spr->voxel, spr->Style.BaseColormap, spr->Style.ColormapNum, cliptop, clipbot, minslabz, maxslabz, flags); // Blend the voxel, if that's what we need to do. @@ -2775,7 +2775,7 @@ extern double BaseYaspectMul;; void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, const FVector3 &dasprpos, DAngle dasprang, fixed_t daxscale, fixed_t dayscale, FVoxel *voxobj, - lighttable_t *colormap, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags) + FColormap *colormap, int colormapnum, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags) { int i, j, k, x, y, syoff, ggxstart, ggystart, nxoff; fixed_t cosang, sinang, sprcosang, sprsinang; @@ -2812,7 +2812,7 @@ void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, sprcosang = FLOAT2FIXED(dasprang.Cos()) >> 2; sprsinang = FLOAT2FIXED(-dasprang.Sin()) >> 2; - R_SetupDrawSlab(colormap); + R_SetupDrawSlab(colormap, 0.0f, colormapnum << FRACBITS); int pixelsize = r_swtruecolor ? 4 : 1; diff --git a/src/r_things.h b/src/r_things.h index 04d5487ee..13f89574b 100644 --- a/src/r_things.h +++ b/src/r_things.h @@ -144,7 +144,7 @@ enum { DVF_OFFSCREEN = 1, DVF_SPANSONLY = 2, DVF_MIRRORED = 4 }; void R_DrawVoxel(const FVector3 &viewpos, FAngle viewangle, const FVector3 &sprpos, DAngle dasprang, fixed_t daxscale, fixed_t dayscale, struct FVoxel *voxobj, - lighttable_t *colormap, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags); + FColormap *colormap, int colormapnum, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags); void R_ClipVisSprite (vissprite_t *vis, int xl, int xh); From db4cba239a16662c437da8dc5d03ce3f14dd151c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 22 Jun 2016 00:27:12 +0200 Subject: [PATCH 071/100] Renamed member variable to make it compile with gcc --- src/r_draw_rgba.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 9603a8b3e..1e2678bd3 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -2309,7 +2309,7 @@ class DrawSlabRGBACommand : public DrawerCommand fixed_t _v; int _dy; fixed_t _vi; - const BYTE *_vptr; + const BYTE *_voxelptr; uint32_t *_p; ShadeConstants _shade_constants; const BYTE *_colormap; @@ -2324,7 +2324,7 @@ public: _v = v; _dy = dy; _vi = vi; - _vptr = vptr; + _voxelptr = vptr; _p = (uint32_t *)p; _shade_constants = shade_constants; _colormap = colormap; @@ -2340,7 +2340,7 @@ public: fixed_t v = _v; int dy = _dy; fixed_t vi = _vi; - const BYTE *vptr = _vptr; + const BYTE *vptr = _voxelptr; uint32_t *p = _p; ShadeConstants shade_constants = _shade_constants; const BYTE *colormap = _colormap; From ca9d8e580e4b2b94c19182dcb160d9de6c904b5c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 22 Jun 2016 00:51:16 +0200 Subject: [PATCH 072/100] Increase command queue memory pool to 16 MB and make it flush if its exhausted --- src/r_draw_rgba.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index c94cb1e4b..47f7c8865 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -193,7 +193,7 @@ EXTERN_CVAR(Bool, r_mipmap) // Manages queueing up commands and executing them on worker threads class DrawerCommandQueue { - enum { memorypool_size = 4 * 1024 * 1024 }; + enum { memorypool_size = 16 * 1024 * 1024 }; char memorypool[memorypool_size]; size_t memorypool_pos = 0; @@ -241,8 +241,13 @@ public: else { void *ptr = AllocMemory(sizeof(T)); - if (!ptr) - return; + if (!ptr) // Out of memory - render what we got + { + queue->Finish(); + ptr = AllocMemory(sizeof(T)); + if (!ptr) + return; + } T *command = new (ptr)T(std::forward(args)...); queue->commands.push_back(command); } From 7a0c801a18bcf4f1910a1ae5cc708fe746ca2f9c Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 22 Jun 2016 08:23:16 +0200 Subject: [PATCH 073/100] Added mipmapping to wallscan --- src/r_draw.h | 29 ------- src/r_draw_rgba.h | 2 +- src/r_segs.cpp | 207 +++++++++++++++++++++++++++++----------------- 3 files changed, 130 insertions(+), 108 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index 547a044ea..72304e81f 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -381,33 +381,4 @@ void R_SetTranslationMap(lighttable_t *translation); extern bool r_swtruecolor; EXTERN_CVAR(Bool, r_bilinear); -// Texture sampler state needed for bilinear filtering -struct SamplerSetup -{ - SamplerSetup() { } - SamplerSetup(fixed_t xoffset, bool magnifying, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)); - - const BYTE *source; - const BYTE *source2; - uint32_t texturefracx; -}; - -inline SamplerSetup::SamplerSetup(fixed_t xoffset, bool magnifying, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)) -{ - // Only do bilinear filtering if enabled and not a magnifying filter - if (!r_swtruecolor || !r_bilinear || magnifying) - { - source = getcol(texture, xoffset >> FRACBITS); - source2 = nullptr; - texturefracx = 0; - } - else - { - int tx = (xoffset - FRACUNIT / 2) >> FRACBITS; - source = getcol(texture, tx); - source2 = getcol(texture, tx + 1); - texturefracx = ((xoffset + FRACUNIT / 2) >> (FRACBITS - 4)) & 15; - } -} - #endif diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 47f7c8865..8f97d4ecd 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -443,7 +443,7 @@ inline bool span_sampler_setup(const uint32_t * RESTRICT &source, int &xbits, in // Is this a magfilter or minfilter? fixed_t xmagnitude = abs(xstep) >> (32 - xbits - FRACBITS); fixed_t ymagnitude = abs(ystep) >> (32 - ybits - FRACBITS); - fixed_t magnitude = (xmagnitude + ymagnitude) * 3 + (1 << (FRACBITS -1)); + fixed_t magnitude = (xmagnitude + ymagnitude) * 2 + (1 << (FRACBITS -1)); if (magnitude >> FRACBITS == 0) return false; diff --git a/src/r_segs.cpp b/src/r_segs.cpp index d71487bb9..84c967d1d 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1067,11 +1067,92 @@ void R_RenderFakeWallRange (drawseg_t *ds, int x1, int x2) return; } +EXTERN_CVAR(Bool, r_mipmap) + +struct WallscanSampler +{ + WallscanSampler() { } + WallscanSampler(int y1, float swal, double yrepeat, fixed_t xoffset, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)); + + uint32_t uv_pos; + uint32_t uv_step; + int32_t uv_fracbits; + uint32_t uv_max; + + const BYTE *source; + const BYTE *source2; + uint32_t texturefracx; +}; + +WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xoffset, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)) +{ + int base_width = texture->GetWidth(); + int base_height = texture->GetHeight(); + uv_fracbits = 32 - texture->HeightBits; + uv_max = base_height << uv_fracbits; + + // Find start uv in [0-base_height[ range. + // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. + double uv_stepd = swal * yrepeat; + double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / base_height; + v = v - floor(v); + v *= base_height; + v *= (1 << uv_fracbits); + + uv_pos = (uint32_t)v; + uv_step = xs_ToFixed(uv_fracbits, uv_stepd); + + bool magnifying = uv_step >> (uv_fracbits - 1) == 0; + + // Only do bilinear filtering if enabled and not a magnifying filter + if (!r_swtruecolor || !r_bilinear || magnifying || getcol != R_GetColumn) + { + source = getcol(texture, xoffset >> FRACBITS); + source2 = nullptr; + texturefracx = 0; + } + else + { + int mipmap_offset = 0; + int mip_width = base_width; + int mip_height = base_height; + if (r_mipmap) + { + fixed_t magnitude = abs((int32_t)uv_step) >> (uv_fracbits - FRACBITS); + int level = magnitude >> FRACBITS; + while (level != 0) + { + if (uv_fracbits > 30) + break; + + mipmap_offset += mip_width * mip_height; + uv_fracbits += 1; + uv_pos >>= 1; + uv_step >>= 1; + xoffset >>= 1; + level >>= 1; + mip_width = MAX(mip_width >> 1, 1); + mip_height = MAX(mip_height >> 1, 1); + } + } + + const uint32_t *pixels = texture->GetPixelsBgra() + mipmap_offset; + + int tx0 = ((xoffset - FRACUNIT / 2) >> FRACBITS) % mip_width; + if (tx0 < 0) + tx0 += mip_width; + int tx1 = (tx0 + 1) % mip_width; + source = (BYTE*)(pixels + tx0 * mip_height); + source2 = (BYTE*)(pixels + tx1 * mip_height); + texturefracx = ((xoffset + FRACUNIT / 2) >> (FRACBITS - 4)) & 15; + } +} + // Draw a column with support for non-power-of-two ranges -uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv_step, uint32_t uv_max, const SamplerSetup &sampler, DWORD(*draw1column)()) +void wallscan_drawcol1(int x, int y1, int y2, WallscanSampler &sampler, DWORD(*draw1column)()) { int pixelsize = r_swtruecolor ? 4 : 1; - if (uv_max == 0) // power of two + if (sampler.uv_max == 0) // power of two { int count = y2 - y1; @@ -1080,24 +1161,24 @@ uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv dc_texturefracx = sampler.texturefracx; dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; dc_count = count; - dc_iscale = uv_step; - dc_texturefrac = uv_start; + dc_iscale = sampler.uv_step; + dc_texturefrac = sampler.uv_pos; draw1column(); - uint64_t step64 = uv_step; - uint64_t pos64 = uv_start; - return (uint32_t)(pos64 + step64 * count); + uint64_t step64 = sampler.uv_step; + uint64_t pos64 = sampler.uv_pos; + sampler.uv_pos = (uint32_t)(pos64 + step64 * count); } else { - uint32_t uv_pos = uv_start; + uint32_t uv_pos = sampler.uv_pos; uint32_t left = y2 - y1; while (left > 0) { - uint32_t available = uv_max - uv_pos; - uint32_t next_uv_wrap = available / uv_step; - if (available % uv_step != 0) + uint32_t available = sampler.uv_max - uv_pos; + uint32_t next_uv_wrap = available / sampler.uv_step; + if (available % sampler.uv_step != 0) next_uv_wrap++; uint32_t count = MIN(left, next_uv_wrap); @@ -1106,25 +1187,25 @@ uint32_t wallscan_drawcol1(int x, int y1, int y2, uint32_t uv_start, uint32_t uv dc_texturefracx = sampler.texturefracx; dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; dc_count = count; - dc_iscale = uv_step; + dc_iscale = sampler.uv_step; dc_texturefrac = uv_pos; draw1column(); left -= count; - uv_pos += uv_step * count; - if (uv_pos >= uv_max) - uv_pos -= uv_max; + uv_pos += sampler.uv_step * count; + if (uv_pos >= sampler.uv_max) + uv_pos -= sampler.uv_max; } - return uv_pos; + sampler.uv_pos = uv_pos; } } // Draw four columns with support for non-power-of-two ranges -void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_step, uint32_t uv_max, const SamplerSetup *sampler, void(*draw4columns)()) +void wallscan_drawcol4(int x, int y1, int y2, WallscanSampler *sampler, void(*draw4columns)()) { int pixelsize = r_swtruecolor ? 4 : 1; - if (uv_max == 0) // power of two, no wrap handling needed + if (sampler[0].uv_max == 0) // power of two, no wrap handling needed { int count = y2 - y1; for (int i = 0; i < 4; i++) @@ -1132,12 +1213,12 @@ void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_ste bufplce[i] = sampler[i].source; bufplce2[i] = sampler[i].source2; buftexturefracx[i] = sampler[i].texturefracx; - vplce[i] = uv_pos[i]; - vince[i] = uv_step[i]; + vplce[i] = sampler[i].uv_pos; + vince[i] = sampler[i].uv_step; - uint64_t step64 = uv_step[i]; - uint64_t pos64 = uv_pos[i]; - uv_pos[i] = (uint32_t)(pos64 + step64 * count); + uint64_t step64 = sampler[i].uv_step; + uint64_t pos64 = sampler[i].uv_pos; + sampler[i].uv_pos = (uint32_t)(pos64 + step64 * count); } dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; dc_count = count; @@ -1160,9 +1241,9 @@ void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_ste uint32_t count = left; for (int i = 0; i < 4; i++) { - uint32_t available = uv_max - uv_pos[i]; - uint32_t next_uv_wrap = available / uv_step[i]; - if (available % uv_step[i] != 0) + uint32_t available = sampler[i].uv_max - sampler[i].uv_pos; + uint32_t next_uv_wrap = available / sampler[i].uv_step; + if (available % sampler[i].uv_step != 0) next_uv_wrap++; count = MIN(next_uv_wrap, count); } @@ -1170,8 +1251,8 @@ void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_ste // Draw until that column wraps for (int i = 0; i < 4; i++) { - vplce[i] = uv_pos[i]; - vince[i] = uv_step[i]; + vplce[i] = sampler[i].uv_pos; + vince[i] = sampler[i].uv_step; } dc_count = count; draw4columns(); @@ -1179,9 +1260,9 @@ void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_ste // Wrap the uv position for (int i = 0; i < 4; i++) { - uv_pos[i] += uv_step[i] * count; - if (uv_pos[i] >= uv_max) - uv_pos[i] -= uv_max; + sampler[i].uv_pos += sampler[i].uv_step * count; + if (sampler[i].uv_pos >= sampler[i].uv_max) + sampler[i].uv_pos -= sampler[i].uv_max; } left -= count; @@ -1189,22 +1270,6 @@ void wallscan_drawcol4(int x, int y1, int y2, uint32_t *uv_pos, uint32_t *uv_ste } } -// Calculates a wrapped uv start position value for a column -void calc_uv_start_and_step(int y1, float swal, double yrepeat, uint32_t uv_height, int fracbits, uint32_t &uv_start_out, uint32_t &uv_step_out) -{ - double uv_stepd = swal * yrepeat; - - // Find start uv in [0-uv_height[ range. - // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. - double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / uv_height; - v = v - floor(v); - v *= uv_height; - v *= (1 << fracbits); - - uv_start_out = (uint32_t)v; - uv_step_out = xs_ToFixed(fracbits, uv_stepd); -} - typedef DWORD(*Draw1ColumnFuncPtr)(); typedef void(*Draw4ColumnsFuncPtr)(); @@ -1216,15 +1281,12 @@ void wallscan_any( if (rw_pic->UseType == FTexture::TEX_Null) return; - uint32_t uv_height = rw_pic->GetHeight(); - uint32_t fracbits = 32 - rw_pic->HeightBits; - uint32_t uv_max = uv_height << fracbits; + fixed_t xoffset = rw_offset; + rw_pic->GetHeight(); // To ensure that rw_pic->HeightBits has been set DWORD(*draw1column)(); void(*draw4columns)(); - setupwallscan(fracbits, draw1column, draw4columns); - - fixed_t xoffset = rw_offset; + setupwallscan(32 - rw_pic->HeightBits, draw1column, draw4columns); bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); if (fixed) @@ -1261,11 +1323,8 @@ void wallscan_any( if (!fixed) R_SetColorMapLight(basecolormap, light, wallshade); - uint32_t uv_start, uv_step; - calc_uv_start_and_step(y1, swal[x], yrepeat, uv_height, fracbits, uv_start, uv_step); - - SamplerSetup sampler(lwal[x] + xoffset, uv_step >> (fracbits - 1) == 0, rw_pic, getcol); - wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, sampler, draw1column); + WallscanSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, rw_pic, getcol); + wallscan_drawcol1(x, y1, y2, sampler, draw1column); } // The aligned columns @@ -1282,17 +1341,9 @@ void wallscan_any( light += rw_lightstep; } - uint32_t uv_pos[4], uv_step[4]; - int magnifying = 0; + WallscanSampler sampler[4]; for (int i = 0; i < 4; i++) - { - calc_uv_start_and_step(y1[i], swal[x + i], yrepeat, uv_height, fracbits, uv_pos[i], uv_step[i]); - magnifying |= uv_step[i] >> (fracbits - 1); - } - - SamplerSetup sampler[4]; - for (int i = 0; i < 4; i++) - sampler[i] = SamplerSetup(lwal[x + i] + xoffset, magnifying == 0, rw_pic, getcol); + sampler[i] = WallscanSampler(y1[i], swal[x + i], yrepeat, lwal[x + i] + xoffset, rw_pic, getcol); // Figure out where we vertically can start and stop drawing 4 columns in one go int middle_y1 = y1[0]; @@ -1305,13 +1356,16 @@ void wallscan_any( // If we got an empty column in our set we cannot draw 4 columns in one go: bool empty_column_in_set = false; + int bilinear_count = 0; for (int i = 0; i < 4; i++) { if (y2[i] <= y1[i]) empty_column_in_set = true; + if (sampler[i].source2) + bilinear_count++; } - if (empty_column_in_set || middle_y2 <= middle_y1) + if (empty_column_in_set || middle_y2 <= middle_y1 || (bilinear_count > 0 && bilinear_count < 4)) { for (int i = 0; i < 4; i++) { @@ -1320,7 +1374,7 @@ void wallscan_any( if (!fixed) R_SetColorMapLight(basecolormap, lights[i], wallshade); - wallscan_drawcol1(x + i, y1[i], y2[i], uv_pos[i], uv_step[i], uv_max, sampler[i], draw1column); + wallscan_drawcol1(x + i, y1[i], y2[i], sampler[i], draw1column); } continue; } @@ -1332,7 +1386,7 @@ void wallscan_any( R_SetColorMapLight(basecolormap, lights[i], wallshade); if (y1[i] < middle_y1) - uv_pos[i] = wallscan_drawcol1(x + i, y1[i], middle_y1, uv_pos[i], uv_step[i], uv_max, sampler[i], draw1column); + wallscan_drawcol1(x + i, y1[i], middle_y1, sampler[i], draw1column); } // Draw the area where all 4 columns are active @@ -1352,7 +1406,7 @@ void wallscan_any( } } } - wallscan_drawcol4(x, middle_y1, middle_y2, uv_pos, uv_step, uv_max, sampler, draw4columns); + wallscan_drawcol4(x, middle_y1, middle_y2, sampler, draw4columns); // Draw the last rows where not all 4 columns are active for (int i = 0; i < 4; i++) @@ -1361,7 +1415,7 @@ void wallscan_any( R_SetColorMapLight(basecolormap, lights[i], wallshade); if (middle_y2 < y2[i]) - uv_pos[i] = wallscan_drawcol1(x + i, middle_y2, y2[i], uv_pos[i], uv_step[i], uv_max, sampler[i], draw1column); + wallscan_drawcol1(x + i, middle_y2, y2[i], sampler[i], draw1column); } } @@ -1376,11 +1430,8 @@ void wallscan_any( if (!fixed) R_SetColorMapLight(basecolormap, light, wallshade); - uint32_t uv_start, uv_step; - calc_uv_start_and_step(y1, swal[x], yrepeat, uv_height, fracbits, uv_start, uv_step); - - SamplerSetup sampler(lwal[x] + xoffset, uv_step >> (fracbits - 1) == 0, rw_pic, getcol); - wallscan_drawcol1(x, y1, y2, uv_start, uv_step, uv_max, sampler, draw1column); + WallscanSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, rw_pic, getcol); + wallscan_drawcol1(x, y1, y2, sampler, draw1column); } NetUpdate (); From 698b5f3db19dd5b3331cae5a7a234c23192c310a Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 24 Jun 2016 11:37:51 +0200 Subject: [PATCH 074/100] Simplify drawer code by creating loop iterators Fixed blending bug --- src/r_draw.cpp | 12 +- src/r_draw.h | 6 +- src/r_draw_rgba.cpp | 3338 +++++++++++----------------------------- src/r_draw_rgba.h | 426 ++--- src/r_draw_rgba_sse.h | 116 +- src/r_drawt_rgba.cpp | 70 +- src/r_drawt_rgba_sse.h | 12 +- src/r_plane.cpp | 4 +- src/r_segs.cpp | 16 +- src/r_things.cpp | 2 +- src/v_draw.cpp | 2 +- 11 files changed, 1235 insertions(+), 2769 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 8cca13289..578ca9646 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -1644,6 +1644,8 @@ extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *v int vlinebits; int mvlinebits; +uint32_t vlinemax; +uint32_t mvlinemax; #ifndef X86_ASM static DWORD vlinec1 (); @@ -1693,11 +1695,12 @@ DWORD (*domvline1)() = mvlineasm1; void (*domvline4)() = mvlineasm4; #endif -void setupvline (int fracbits) +void setupvline (int fracbits, int fracmax) { if (r_swtruecolor) { vlinebits = fracbits; + vlinemax = fracmax; return; } @@ -1777,7 +1780,7 @@ void vlinec4 () } #endif -void setupmvline (int fracbits) +void setupmvline (int fracbits, int fracmax) { if (!r_swtruecolor) { @@ -1792,6 +1795,7 @@ void setupmvline (int fracbits) else { mvlinebits = fracbits; + mvlinemax = fracmax; } } @@ -1964,10 +1968,12 @@ void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip) } int tmvlinebits; +uint32_t tmvlinemax; -void setuptmvline (int bits) +void setuptmvline (int bits, int fracmax) { tmvlinebits = bits; + tmvlinemax = fracmax; } fixed_t tmvline1_add_C () diff --git a/src/r_draw.h b/src/r_draw.h index 72304e81f..bd477efc4 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -100,13 +100,13 @@ extern void (*R_DrawColumn)(void); extern DWORD (*dovline1) (); extern DWORD (*doprevline1) (); extern void (*dovline4) (); -extern void setupvline (int); +extern void setupvline (int,int); extern DWORD (*domvline1) (); extern void (*domvline4) (); -extern void setupmvline (int); +extern void setupmvline (int,int); -extern void setuptmvline (int); +extern void setuptmvline (int,int); // The Spectre/Invisibility effect. extern void (*R_DrawFuzzColumn)(void); diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 1e2678bd3..dc97fdd47 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -51,6 +51,9 @@ extern int vlinebits; extern int mvlinebits; extern int tmvlinebits; +extern uint32_t vlinemax; +extern uint32_t mvlinemax; +extern uint32_t tmvlinemax; extern "C" short spanend[MAXHEIGHT]; extern float rw_light; @@ -261,353 +264,520 @@ void DrawerCommandQueue::StopThreads() ///////////////////////////////////////////////////////////////////////////// -class DrawColumnRGBACommand : public DrawerCommand +class DrawerColumnCommand : public DrawerCommand { +public: int _count; BYTE * RESTRICT _dest; - DWORD _texturefrac; - DWORD _iscale; - fixed_t _light; - const BYTE * RESTRICT _source; int _pitch; + DWORD _iscale; + DWORD _texturefrac; + + DrawerColumnCommand() + { + _count = dc_count; + _dest = dc_dest; + _iscale = dc_iscale; + _texturefrac = dc_texturefrac; + _pitch = dc_pitch; + } + + class LoopIterator + { + public: + int count; + uint32_t *dest; + int pitch; + fixed_t fracstep; + fixed_t frac; + + LoopIterator(DrawerColumnCommand *command, DrawerThread *thread) + { + count = thread->count_for_thread(command->_dest_y, command->_count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(command->_dest_y, command->_pitch, (uint32_t*)command->_dest); + pitch = command->_pitch * thread->num_cores; + + fracstep = command->_iscale * thread->num_cores; + frac = command->_texturefrac + command->_iscale * thread->skipped_by_thread(command->_dest_y); + } + + uint32_t sample_index() + { + return frac >> FRACBITS; + } + + explicit operator bool() + { + return count > 0; + } + + bool next() + { + dest += pitch; + frac += fracstep; + return (--count) != 0; + } + }; +}; + +class DrawColumnRGBACommand : public DrawerColumnCommand +{ + uint32_t _light; + const BYTE * RESTRICT _source; ShadeConstants _shade_constants; BYTE * RESTRICT _colormap; public: DrawColumnRGBACommand() { - _count = dc_count; - _dest = dc_dest; - _texturefrac = dc_texturefrac; - _iscale = dc_iscale; - _light = dc_light; - _source = dc_source; - _pitch = dc_pitch; + _light = LightBgra::calc_light_multiplier(dc_light); _shade_constants = dc_shade_constants; + _source = dc_source; _colormap = dc_colormap; } void Execute(DrawerThread *thread) override { - int count; - uint32_t* dest; - fixed_t frac; - fixed_t fracstep; - - count = thread->count_for_thread(_dest_y, _count); - - // Zero length, column does not exceed a pixel. - if (count <= 0) - return; - - // Framebuffer destination address. - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - // Determine scaling, - // which is the only mapping to be done. - fracstep = _iscale * thread->num_cores; - frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - - // [RH] Get local copies of these variables so that the compiler - // has a better chance of optimizing this well. - const BYTE *source = _source; - int pitch = _pitch * thread->num_cores; - BYTE *colormap = _colormap; - + LoopIterator loop(this, thread); + if (!loop) return; do { - *dest = shade_pal_index(colormap[source[frac >> FRACBITS]], light, shade_constants); - - dest += pitch; - frac += fracstep; - - } while (--count); + uint32_t fg = LightBgra::shade_pal_index(_colormap[_source[loop.sample_index()]], _light, _shade_constants); + *loop.dest = BlendBgra::copy(fg); + } while (loop.next()); } }; -class FillColumnRGBACommand : public DrawerCommand +class FillColumnRGBACommand : public DrawerColumnCommand { - int _count; - BYTE * RESTRICT _dest; - fixed_t _light; - int _pitch; - int _color; + uint32_t _color; public: FillColumnRGBACommand() { - _count = dc_count; - _dest = dc_dest; - _light = dc_light; - _pitch = dc_pitch; - _color = dc_color; + uint32_t light = LightBgra::calc_light_multiplier(dc_light); + _color = LightBgra::shade_pal_index_simple(dc_color, light); } void Execute(DrawerThread *thread) override { - int count; - uint32_t* dest; - - count = thread->count_for_thread(_dest_y, _count); - - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - uint32_t light = calc_light_multiplier(_light); - + LoopIterator loop(this, thread); + if (!loop) return; + do { - int pitch = _pitch * thread->num_cores; - uint32_t color = shade_pal_index_simple(_color, light); - - do - { - *dest = color; - dest += pitch; - } while (--count); - } + *loop.dest = BlendBgra::copy(_color); + } while (loop.next()); } }; -class FillAddColumnRGBACommand : public DrawerCommand +class FillAddColumnRGBACommand : public DrawerColumnCommand { - int _count; - BYTE * RESTRICT _dest; - int _pitch; uint32_t _srccolor; public: FillAddColumnRGBACommand() { - _count = dc_count; - _dest = dc_dest; - _pitch = dc_pitch; _srccolor = dc_srccolor_bgra; } void Execute(DrawerThread *thread) override { - int count; - uint32_t *dest; + LoopIterator loop(this, thread); + if (!loop) return; - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - - uint32_t fg = _srccolor; - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t fg_alpha = fg >> 24; - fg_alpha += fg_alpha >> 7; - - fg_red *= fg_alpha; - fg_green *= fg_alpha; - fg_blue *= fg_alpha; - - uint32_t inv_alpha = 256 - fg_alpha; + uint32_t alpha = APART(_srccolor); + alpha += alpha >> 7; do { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red + bg_red * inv_alpha) / 256; - uint32_t green = (fg_green + bg_green * inv_alpha) / 256; - uint32_t blue = (fg_blue + bg_blue * inv_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); + *loop.dest = BlendBgra::add(_srccolor, *loop.dest, alpha, 256 - alpha); + } while (loop.next()); } }; -class FillAddClampColumnRGBACommand : public DrawerCommand +class FillAddClampColumnRGBACommand : public DrawerColumnCommand { - int _count; - BYTE * RESTRICT _dest; - int _pitch; int _color; uint32_t _srccolor; - fixed_t _srcalpha; - fixed_t _destalpha; + uint32_t _srcalpha; + uint32_t _destalpha; public: FillAddClampColumnRGBACommand() { - _count = dc_count; - _dest = dc_dest; - _pitch = dc_pitch; _color = dc_color; _srccolor = dc_srccolor_bgra; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); } void Execute(DrawerThread *thread) override { - int count; - uint32_t *dest; - - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - - uint32_t fg = _srccolor; - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - fg_red *= fg_alpha; - fg_green *= fg_alpha; - fg_blue *= fg_alpha; - - do { - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); + LoopIterator loop(this, thread); + if (!loop) return; + do + { + *loop.dest = BlendBgra::add(_srccolor, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); } }; -class FillSubClampColumnRGBACommand : public DrawerCommand +class FillSubClampColumnRGBACommand : public DrawerColumnCommand { - int _count; - BYTE * RESTRICT _dest; - int _pitch; - int _color; uint32_t _srccolor; - fixed_t _srcalpha; - fixed_t _destalpha; + uint32_t _srcalpha; + uint32_t _destalpha; public: FillSubClampColumnRGBACommand() { - _count = dc_count; - _dest = dc_dest; - _pitch = dc_pitch; - _color = dc_color; _srccolor = dc_srccolor_bgra; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); } void Execute(DrawerThread *thread) override { - int count; - uint32_t *dest; - - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - - uint32_t fg = _srccolor; - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - fg_red *= fg_alpha; - fg_green *= fg_alpha; - fg_blue *= fg_alpha; - - do { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); + LoopIterator loop(this, thread); + if (!loop) return; + do + { + *loop.dest = BlendBgra::sub(_srccolor, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); } }; -class FillRevSubClampColumnRGBACommand : public DrawerCommand +class FillRevSubClampColumnRGBACommand : public DrawerColumnCommand { - int _count; - BYTE * RESTRICT _dest; - int _pitch; - int _color; uint32_t _srccolor; - fixed_t _srcalpha; - fixed_t _destalpha; + uint32_t _srcalpha; + uint32_t _destalpha; public: FillRevSubClampColumnRGBACommand() { - _count = dc_count; - _dest = dc_dest; - _pitch = dc_pitch; - _color = dc_color; _srccolor = dc_srccolor_bgra; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); } void Execute(DrawerThread *thread) override { - int count; - uint32_t *dest; + LoopIterator loop(this, thread); + if (!loop) return; + do + { + *loop.dest = BlendBgra::revsub(_srccolor, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; +class DrawAddColumnRGBACommand : public DrawerColumnCommand +{ + const BYTE * RESTRICT _source; + uint32_t _light; + ShadeConstants _shade_constants; + uint32_t _srcalpha; + uint32_t _destalpha; + BYTE * RESTRICT _colormap; - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; +public: + DrawAddColumnRGBACommand() + { + _source = dc_source; + _light = LightBgra::calc_light_multiplier(dc_light); + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); + _colormap = dc_colormap; + } - uint32_t fg = _srccolor; - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(_colormap[_source[loop.sample_index()]], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; - fg_red *= fg_alpha; - fg_green *= fg_alpha; - fg_blue *= fg_alpha; +class DrawTranslatedColumnRGBACommand : public DrawerColumnCommand +{ + fixed_t _light; + ShadeConstants _shade_constants; + BYTE * RESTRICT _translation; + const BYTE * RESTRICT _source; - do { - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; +public: + DrawTranslatedColumnRGBACommand() + { + _light = LightBgra::calc_light_multiplier(dc_light); + _shade_constants = dc_shade_constants; + _translation = dc_translation; + _source = dc_source; + } - uint32_t red = clamp((0x10000 + fg_red - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(_translation[_source[loop.sample_index()]], _light, _shade_constants); + *loop.dest = BlendBgra::copy(fg); + } while (loop.next()); + } +}; - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - } while (--count); +class DrawTlatedAddColumnRGBACommand : public DrawerColumnCommand +{ + fixed_t _light; + ShadeConstants _shade_constants; + BYTE * RESTRICT _translation; + const BYTE * RESTRICT _source; + uint32_t _srcalpha; + uint32_t _destalpha; + +public: + DrawTlatedAddColumnRGBACommand() + { + _light = LightBgra::calc_light_multiplier(dc_light); + _shade_constants = dc_shade_constants; + _translation = dc_translation; + _source = dc_source; + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(_translation[_source[loop.sample_index()]], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; + +class DrawShadedColumnRGBACommand : public DrawerColumnCommand +{ +private: + const BYTE * RESTRICT _source; + lighttable_t * RESTRICT _colormap; + uint32_t _color; + +public: + DrawShadedColumnRGBACommand() + { + _source = dc_source; + _colormap = dc_colormap; + _color = LightBgra::shade_pal_index_simple(dc_color, LightBgra::calc_light_multiplier(dc_light)); + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t alpha = clamp(_colormap[_source[loop.sample_index()]], 0, 64) * 4; + uint32_t inv_alpha = 256 - alpha; + *loop.dest = BlendBgra::add(_color, *loop.dest, alpha, inv_alpha); + } while (loop.next()); + } +}; + +class DrawAddClampColumnRGBACommand : public DrawerColumnCommand +{ + const BYTE * RESTRICT _source; + uint32_t _light; + ShadeConstants _shade_constants; + uint32_t _srcalpha; + uint32_t _destalpha; + +public: + DrawAddClampColumnRGBACommand() + { + _source = dc_source; + _light = LightBgra::calc_light_multiplier(dc_light); + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(_source[loop.sample_index()], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; + +class DrawAddClampTranslatedColumnRGBACommand : public DrawerColumnCommand +{ + BYTE * RESTRICT _translation; + const BYTE * RESTRICT _source; + uint32_t _light; + ShadeConstants _shade_constants; + uint32_t _srcalpha; + uint32_t _destalpha; + +public: + DrawAddClampTranslatedColumnRGBACommand() + { + _translation = dc_translation; + _source = dc_source; + _light = LightBgra::calc_light_multiplier(dc_light); + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(_translation[_source[loop.sample_index()]], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; + +class DrawSubClampColumnRGBACommand : public DrawerColumnCommand +{ + const BYTE * RESTRICT _source; + uint32_t _light; + ShadeConstants _shade_constants; + uint32_t _srcalpha; + uint32_t _destalpha; + +public: + DrawSubClampColumnRGBACommand() + { + _source = dc_source; + _light = LightBgra::calc_light_multiplier(dc_light); + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(_source[loop.sample_index()], _light, _shade_constants); + *loop.dest = BlendBgra::sub(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; + +class DrawSubClampTranslatedColumnRGBACommand : public DrawerColumnCommand +{ + const BYTE * RESTRICT _source; + uint32_t _light; + ShadeConstants _shade_constants; + uint32_t _srcalpha; + uint32_t _destalpha; + BYTE * RESTRICT _translation; + +public: + DrawSubClampTranslatedColumnRGBACommand() + { + _source = dc_source; + _light = LightBgra::calc_light_multiplier(dc_light); + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); + _translation = dc_translation; + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(_translation[_source[loop.sample_index()]], _light, _shade_constants); + *loop.dest = BlendBgra::sub(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; + +class DrawRevSubClampColumnRGBACommand : public DrawerColumnCommand +{ + const BYTE * RESTRICT _source; + uint32_t _light; + ShadeConstants _shade_constants; + uint32_t _srcalpha; + uint32_t _destalpha; + +public: + DrawRevSubClampColumnRGBACommand() + { + _source = dc_source; + _light = LightBgra::calc_light_multiplier(dc_light); + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(_source[loop.sample_index()], _light, _shade_constants); + *loop.dest = BlendBgra::revsub(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; + +class DrawRevSubClampTranslatedColumnRGBACommand : public DrawerColumnCommand +{ + const BYTE * RESTRICT _source; + uint32_t _light; + ShadeConstants _shade_constants; + uint32_t _srcalpha; + uint32_t _destalpha; + BYTE * RESTRICT _translation; + +public: + DrawRevSubClampTranslatedColumnRGBACommand() + { + _source = dc_source; + _light = LightBgra::calc_light_multiplier(dc_light); + _shade_constants = dc_shade_constants; + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); + _translation = dc_translation; + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(_translation[_source[loop.sample_index()]], _light, _shade_constants); + *loop.dest = BlendBgra::revsub(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); } }; @@ -635,19 +805,16 @@ public: void Execute(DrawerThread *thread) override { - int count; - uint32_t *dest; - int yl = MAX(_yl, 1); int yh = MIN(_yh, _fuzzviewheight); - count = thread->count_for_thread(yl, yh - yl + 1); + int count = thread->count_for_thread(yl, yh - yl + 1); // Zero length. if (count <= 0) return; - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + _x + (uint32_t*)_destorg); + uint32_t *dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + _x + (uint32_t*)_destorg); int pitch = _pitch * thread->num_cores; int fuzzstep = thread->num_cores; @@ -659,13 +826,10 @@ public: if (yl < fuzzstep) { uint32_t bg = dest[fuzzoffset[fuzz] * fuzzstep + pitch]; - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = (bg) & 0xff; - uint32_t red = bg_red * 3 / 4; - uint32_t green = bg_green * 3 / 4; - uint32_t blue = bg_blue * 3 / 4; + uint32_t red = RPART(bg) * 3 / 4; + uint32_t green = GPART(bg) * 3 / 4; + uint32_t blue = BPART(bg) * 3 / 4; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -694,13 +858,10 @@ public: do { uint32_t bg = dest[fuzzoffset[fuzz] * fuzzstep]; - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = (bg) & 0xff; - uint32_t red = bg_red * 3 / 4; - uint32_t green = bg_green * 3 / 4; - uint32_t blue = bg_blue * 3 / 4; + uint32_t red = RPART(bg) * 3 / 4; + uint32_t green = GPART(bg) * 3 / 4; + uint32_t blue = BPART(bg) * 3 / 4; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; dest += pitch; @@ -714,783 +875,21 @@ public: if (lowerbounds) { uint32_t bg = dest[fuzzoffset[fuzz] * fuzzstep - pitch]; - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = (bg) & 0xff; - uint32_t red = bg_red * 3 / 4; - uint32_t green = bg_green * 3 / 4; - uint32_t blue = bg_blue * 3 / 4; + uint32_t red = RPART(bg) * 3 / 4; + uint32_t green = GPART(bg) * 3 / 4; + uint32_t blue = BPART(bg) * 3 / 4; *dest = 0xff000000 | (red << 16) | (green << 8) | blue; } } }; -class DrawAddColumnRGBACommand : public DrawerCommand -{ - int _count; - BYTE * RESTRICT _dest; - DWORD _iscale; - DWORD _texturefrac; - const BYTE * RESTRICT _source; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - BYTE * RESTRICT _colormap; +///////////////////////////////////////////////////////////////////////////// +class DrawerSpanCommand : public DrawerCommand +{ public: - DrawAddColumnRGBACommand() - { - _count = dc_count; - _dest = dc_dest; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _source = dc_source; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - _colormap = dc_colormap; - } - - void Execute(DrawerThread *thread) override - { - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - fracstep = _iscale * thread->num_cores; - frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - - { - const BYTE *source = _source; - int pitch = _pitch * thread->num_cores; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - BYTE *colormap = _colormap; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(colormap[source[frac >> FRACBITS]], light, shade_constants); - - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } - } -}; - -class DrawTranslatedColumnRGBACommand : public DrawerCommand -{ - int _count; - fixed_t _light; - ShadeConstants _shade_constants; - BYTE * RESTRICT _dest; - DWORD _iscale; - DWORD _texturefrac; - BYTE * RESTRICT _translation; - const BYTE * RESTRICT _source; - int _pitch; - -public: - DrawTranslatedColumnRGBACommand() - { - _count = dc_count; - _light = dc_light; - _shade_constants = dc_shade_constants; - _dest = dc_dest; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _translation = dc_translation; - _source = dc_source; - _pitch = dc_pitch; - } - - void Execute(DrawerThread *thread) override - { - int count; - uint32_t* dest; - fixed_t frac; - fixed_t fracstep; - - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - fracstep = _iscale * thread->num_cores; - frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - - { - // [RH] Local copies of global vars to improve compiler optimizations - BYTE *translation = _translation; - const BYTE *source = _source; - int pitch = _pitch * thread->num_cores; - - do - { - *dest = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - dest += pitch; - frac += fracstep; - } while (--count); - } - } -}; - -class DrawTlatedAddColumnRGBACommand : public DrawerCommand -{ - int _count; - fixed_t _light; - ShadeConstants _shade_constants; - BYTE * RESTRICT _dest; - DWORD _iscale; - DWORD _texturefrac; - BYTE * RESTRICT _translation; - const BYTE * RESTRICT _source; - int _pitch; - fixed_t _srcalpha; - fixed_t _destalpha; - -public: - DrawTlatedAddColumnRGBACommand() - { - _count = dc_count; - _light = dc_light; - _shade_constants = dc_shade_constants; - _dest = dc_dest; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _translation = dc_translation; - _source = dc_source; - _pitch = dc_pitch; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - } - - void Execute(DrawerThread *thread) override - { - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - fracstep = _iscale * thread->num_cores; - frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - - { - BYTE *translation = _translation; - const BYTE *source = _source; - int pitch = _pitch * thread->num_cores; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } - } -}; - -class DrawShadedColumnRGBACommand : public DrawerCommand -{ -private: - int _count; - BYTE * RESTRICT _dest; - DWORD _iscale; - DWORD _texturefrac; - fixed_t _light; - const BYTE * RESTRICT _source; - lighttable_t * RESTRICT _colormap; - int _color; - int _pitch; - -public: - DrawShadedColumnRGBACommand() - { - _count = dc_count; - _dest = dc_dest; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _light = dc_light; - _source = dc_source; - _colormap = dc_colormap; - _color = dc_color; - _pitch = dc_pitch; - } - - void Execute(DrawerThread *thread) override - { - int count; - uint32_t *dest; - fixed_t frac, fracstep; - - count = thread->count_for_thread(_dest_y, _count); - - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - fracstep = _iscale * thread->num_cores; - frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - - uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - { - const BYTE *source = _source; - BYTE *colormap = _colormap; - int pitch = _pitch * thread->num_cores; - - do - { - DWORD alpha = clamp(colormap[source[frac >> FRACBITS]], 0, 64); - DWORD inv_alpha = 64 - alpha; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; - uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; - uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } - } -}; - -class DrawAddClampColumnRGBACommand : public DrawerCommand -{ - int _count; - BYTE * RESTRICT _dest; - DWORD _iscale; - DWORD _texturefrac; - const BYTE * RESTRICT _source; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - -public: - DrawAddClampColumnRGBACommand() - { - _count = dc_count; - _dest = dc_dest; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _source = dc_source; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - } - - void Execute(DrawerThread *thread) override - { - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - fracstep = _iscale * thread->num_cores; - frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - - { - const BYTE *source = _source; - int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } - } -}; - -class DrawAddClampTranslatedColumnRGBACommand : public DrawerCommand -{ - int _count; - BYTE * RESTRICT _dest; - DWORD _iscale; - DWORD _texturefrac; - BYTE * RESTRICT _translation; - const BYTE * RESTRICT _source; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - -public: - DrawAddClampTranslatedColumnRGBACommand() - { - _count = dc_count; - _dest = dc_dest; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _translation = dc_translation; - _source = dc_source; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - } - - void Execute(DrawerThread *thread) override - { - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - fracstep = _iscale * thread->num_cores; - frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - - { - BYTE *translation = _translation; - const BYTE *source = _source; - int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } - } -}; - -class DrawSubClampColumnRGBACommand : public DrawerCommand -{ - int _count; - BYTE * RESTRICT _dest; - DWORD _iscale; - DWORD _texturefrac; - const BYTE * RESTRICT _source; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - -public: - DrawSubClampColumnRGBACommand() - { - _count = dc_count; - _dest = dc_dest; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _source = dc_source; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - } - - void Execute(DrawerThread *thread) override - { - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - fracstep = _iscale * thread->num_cores; - frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - - { - const BYTE *source = _source; - int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } - } -}; - -class DrawSubClampTranslatedColumnRGBACommand : public DrawerCommand -{ - int _count; - BYTE * RESTRICT _dest; - DWORD _iscale; - DWORD _texturefrac; - const BYTE * RESTRICT _source; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - BYTE * RESTRICT _translation; - -public: - DrawSubClampTranslatedColumnRGBACommand() - { - _count = dc_count; - _dest = dc_dest; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _source = dc_source; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - _translation = dc_translation; - } - - void Execute(DrawerThread *thread) override - { - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - fracstep = _iscale * thread->num_cores; - frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - - { - BYTE *translation = _translation; - const BYTE *source = _source; - int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } - } -}; - -class DrawRevSubClampColumnRGBACommand : public DrawerCommand -{ - int _count; - BYTE * RESTRICT _dest; - DWORD _iscale; - DWORD _texturefrac; - const BYTE * RESTRICT _source; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - -public: - DrawRevSubClampColumnRGBACommand() - { - _count = dc_count; - _dest = dc_dest; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _source = dc_source; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - } - - void Execute(DrawerThread *thread) override - { - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - fracstep = _iscale * thread->num_cores; - frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - - { - const BYTE *source = _source; - int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(source[frac >> FRACBITS], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } - } -}; - -class DrawRevSubClampTranslatedColumnRGBACommand : public DrawerCommand -{ - int _count; - BYTE * RESTRICT _dest; - DWORD _iscale; - DWORD _texturefrac; - const BYTE * RESTRICT _source; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - BYTE * RESTRICT _translation; - -public: - DrawRevSubClampTranslatedColumnRGBACommand() - { - _count = dc_count; - _dest = dc_dest; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _source = dc_source; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - _translation = dc_translation; - } - - void Execute(DrawerThread *thread) override - { - int count; - uint32_t *dest; - fixed_t frac; - fixed_t fracstep; - - count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - - fracstep = _iscale * thread->num_cores; - frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - - { - BYTE * RESTRICT translation = _translation; - const BYTE * RESTRICT source = _source; - int pitch = _pitch * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do - { - uint32_t fg = shade_pal_index(translation[source[frac >> FRACBITS]], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - frac += fracstep; - } while (--count); - } - } -}; - -class DrawSpanRGBACommand : public DrawerCommand -{ - const uint32_t * RESTRICT _source; fixed_t _xfrac; fixed_t _yfrac; fixed_t _xstep; @@ -1501,14 +900,17 @@ class DrawSpanRGBACommand : public DrawerCommand int _xbits; int _ybits; BYTE * RESTRICT _destorg; - fixed_t _light; + + const uint32_t * RESTRICT _source; + uint32_t _light; ShadeConstants _shade_constants; bool _magnifying; -public: - DrawSpanRGBACommand() + uint32_t _srcalpha; + uint32_t _destalpha; + + DrawerSpanCommand() { - _source = (const uint32_t*)ds_source; _xfrac = ds_xfrac; _yfrac = ds_yfrac; _xstep = ds_xstep; @@ -1519,752 +921,270 @@ public: _xbits = ds_xbits; _ybits = ds_ybits; _destorg = dc_destorg; - _light = ds_light; + + _source = (const uint32_t*)ds_source; + _light = LightBgra::calc_light_multiplier(ds_light); _shade_constants = ds_shade_constants; - _magnifying = !span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); + _magnifying = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); + + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); } + class LoopIterator + { + public: + uint32_t *dest; + int count; + dsfixed_t xfrac; + dsfixed_t yfrac; + dsfixed_t xstep; + dsfixed_t ystep; + BYTE yshift; + BYTE xshift; + int xmask; + bool is_64x64; + bool skipped; + + LoopIterator(DrawerSpanCommand *command, DrawerThread *thread) + { + dest = ylookup[command->_y] + command->_x1 + (uint32_t*)command->_destorg; + count = command->_x2 - command->_x1 + 1; + xfrac = command->_xfrac; + yfrac = command->_yfrac; + xstep = command->_xstep; + ystep = command->_ystep; + yshift = 32 - command->_ybits; + xshift = yshift - command->_xbits; + xmask = ((1 << command->_xbits) - 1) << command->_ybits; + is_64x64 = command->_xbits == 6 && command->_ybits == 6; + skipped = thread->line_skipped_by_thread(command->_y); + } + + // 64x64 is the most common case by far, so special case it. + int spot64() + { + return ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + } + + int spot() + { + return ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + } + + explicit operator bool() + { + return !skipped && count > 0; + } + + bool next() + { + dest++; + xfrac += xstep; + yfrac += ystep; + return (--count) != 0; + } + }; +}; + +class DrawSpanRGBACommand : public DrawerSpanCommand +{ +public: void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(_y)) - return; - - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const uint32_t* source = _source; - int count; - int spot; - - xfrac = _xfrac; - yfrac = _yfrac; - - dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - - count = _x2 - _x1 + 1; - - xstep = _xstep; - ystep = _ystep; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; + LoopIterator loop(this, thread); + if (!loop) return; if (_magnifying) { - if (_xbits == 6 && _ybits == 6) + if (loop.is_64x64) { - // 64x64 is the most common case by far, so special case it. - do { - // Current texture index in u,v. - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); + *loop.dest = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants); + } while (loop.next()); } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; - do { - // Current texture index in u,v. - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); - - // Next step in u,v. - xfrac += xstep; - yfrac += ystep; - } while (--count); + *loop.dest = LightBgra::shade_bgra(_source[loop.spot()], _light, _shade_constants); + } while (loop.next()); } } else { - if (_xbits == 6 && _ybits == 6) + if (loop.is_64x64) { - // 64x64 is the most common case by far, so special case it. - do { - *dest++ = shade_bgra(sample_bilinear(source, xfrac, yfrac, 26, 26), light, shade_constants); - xfrac += xstep; - yfrac += ystep; - } while (--count); + *loop.dest = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, loop.xfrac, loop.yfrac, 26, 26), _light, _shade_constants); + } while (loop.next()); } else { do { - *dest++ = shade_bgra(sample_bilinear(source, xfrac, yfrac, 32 - _xbits, 32 - _ybits), light, shade_constants); - xfrac += xstep; - yfrac += ystep; - } while (--count); + *loop.dest = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, loop.xfrac, loop.yfrac, 32 - _xbits, 32 - _ybits), _light, _shade_constants); + } while (loop.next()); } } } }; -class DrawSpanMaskedRGBACommand : public DrawerCommand +class DrawSpanMaskedRGBACommand : public DrawerSpanCommand { - const uint32_t * RESTRICT _source; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _xfrac; - fixed_t _yfrac; - BYTE * RESTRICT _destorg; - int _x1; - int _x2; - int _y1; - int _y; - fixed_t _xstep; - fixed_t _ystep; - int _xbits; - int _ybits; - bool _magnifying; - public: - DrawSpanMaskedRGBACommand() - { - _source = (const uint32_t*)ds_source; - _light = ds_light; - _shade_constants = ds_shade_constants; - _xfrac = ds_xfrac; - _yfrac = ds_yfrac; - _destorg = dc_destorg; - _x1 = ds_x1; - _x2 = ds_x2; - _y = ds_y; - _xstep = ds_xstep; - _ystep = ds_ystep; - _xbits = ds_xbits; - _ybits = ds_ybits; - _magnifying = !span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); - } - void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(_y)) - return; - - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const uint32_t* source = _source; - int count; - int spot; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - xfrac = _xfrac; - yfrac = _yfrac; - - dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - - count = _x2 - _x1 + 1; - - xstep = _xstep; - ystep = _ystep; + LoopIterator loop(this, thread); + if (!loop) return; if (_magnifying) { - if (_xbits == 6 && _ybits == 6) + if (loop.is_64x64) { - // 64x64 is the most common case by far, so special case it. do { - uint32_t texdata; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - texdata = source[spot]; - *dest = alpha_blend(shade_bgra(texdata, light, shade_constants), *dest); - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants); + *loop.dest = BlendBgra::alpha_blend(fg, *loop.dest); + } while (loop.next()); } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; do { - uint32_t texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - *dest = alpha_blend(shade_bgra(texdata, light, shade_constants), *dest); - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.spot()], _light, _shade_constants); + *loop.dest = BlendBgra::alpha_blend(fg, *loop.dest); + } while (loop.next()); } } else { - if (_xbits == 6 && _ybits == 6) + if (loop.is_64x64) { - // 64x64 is the most common case by far, so special case it. do { - *dest = alpha_blend(shade_bgra(sample_bilinear(source, xfrac, yfrac, 26, 26), light, shade_constants), *dest); - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, loop.xfrac, loop.yfrac, 26, 26), _light, _shade_constants); + *loop.dest = BlendBgra::alpha_blend(fg, *loop.dest); + } while (loop.next()); } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; do { - *dest = alpha_blend(shade_bgra(sample_bilinear(source, xfrac, yfrac, 32 - _xbits, 32 - _ybits), light, shade_constants), *dest); - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, loop.xfrac, loop.yfrac, 32 - _xbits, 32 - _ybits), _light, _shade_constants); + *loop.dest = BlendBgra::alpha_blend(fg, *loop.dest); + } while (loop.next()); } } } }; -class DrawSpanTranslucentRGBACommand : public DrawerCommand +class DrawSpanTranslucentRGBACommand : public DrawerSpanCommand { - const uint32_t * RESTRICT _source; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _xfrac; - fixed_t _yfrac; - BYTE * RESTRICT _destorg; - int _x1; - int _x2; - int _y1; - int _y; - fixed_t _xstep; - fixed_t _ystep; - int _xbits; - int _ybits; - fixed_t _srcalpha; - fixed_t _destalpha; - public: - DrawSpanTranslucentRGBACommand() - { - _source = (const uint32_t *)ds_source; - _light = ds_light; - _shade_constants = ds_shade_constants; - _xfrac = ds_xfrac; - _yfrac = ds_yfrac; - _destorg = dc_destorg; - _x1 = ds_x1; - _x2 = ds_x2; - _y = ds_y; - _xstep = ds_xstep; - _ystep = ds_ystep; - _xbits = ds_xbits; - _ybits = ds_ybits; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - } - void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(_y)) - return; + LoopIterator loop(this, thread); + if (!loop) return; - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const uint32_t* source = _source; - int count; - int spot; - - xfrac = _xfrac; - yfrac = _yfrac; - - dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - - count = _x2 - _x1 + 1; - - xstep = _xstep; - ystep = _ystep; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - if (_xbits == 6 && _ybits == 6) + if (loop.is_64x64) { - // 64x64 is the most common case by far, so special case it. do { - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - uint32_t fg = shade_bgra(source[spot], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; - - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; do { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - uint32_t fg = shade_bgra(source[spot], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; - - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.spot()], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); } } }; -class DrawSpanMaskedTranslucentRGBACommand : public DrawerCommand +class DrawSpanMaskedTranslucentRGBACommand : public DrawerSpanCommand { - const uint32_t * RESTRICT _source; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _xfrac; - fixed_t _yfrac; - BYTE * RESTRICT _destorg; - int _x1; - int _x2; - int _y1; - int _y; - fixed_t _xstep; - fixed_t _ystep; - int _xbits; - int _ybits; - fixed_t _srcalpha; - fixed_t _destalpha; - public: - DrawSpanMaskedTranslucentRGBACommand() - { - _source = (const uint32_t*)ds_source; - _light = ds_light; - _shade_constants = ds_shade_constants; - _xfrac = ds_xfrac; - _yfrac = ds_yfrac; - _destorg = dc_destorg; - _x1 = ds_x1; - _x2 = ds_x2; - _y = ds_y; - _xstep = ds_xstep; - _ystep = ds_ystep; - _xbits = ds_xbits; - _ybits = ds_ybits; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - } - void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(_y)) - return; + LoopIterator loop(this, thread); + if (!loop) return; - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const uint32_t* source = _source; - int count; - int spot; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - xfrac = _xfrac; - yfrac = _yfrac; - - dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - - count = _x2 - _x1 + 1; - - xstep = _xstep; - ystep = _ystep; - - if (_xbits == 6 && _ybits == 6) + if (loop.is_64x64) { - // 64x64 is the most common case by far, so special case it. do { - uint32_t texdata; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - texdata = source[spot]; - if (texdata != 0) - { - uint32_t fg = shade_bgra(texdata, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, calc_blend_bgalpha(fg, _destalpha)); + } while (loop.next()); } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; do { - uint32_t texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - if (texdata != 0) - { - uint32_t fg = shade_bgra(texdata, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.spot()], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, calc_blend_bgalpha(fg, _destalpha)); + } while (loop.next()); } } }; -class DrawSpanAddClampRGBACommand : public DrawerCommand +class DrawSpanAddClampRGBACommand : public DrawerSpanCommand { - const uint32_t * RESTRICT _source; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _xfrac; - fixed_t _yfrac; - BYTE * RESTRICT _destorg; - int _x1; - int _x2; - int _y1; - int _y; - fixed_t _xstep; - fixed_t _ystep; - int _xbits; - int _ybits; - fixed_t _srcalpha; - fixed_t _destalpha; - public: - DrawSpanAddClampRGBACommand() - { - _source = (const uint32_t*)ds_source; - _light = ds_light; - _shade_constants = ds_shade_constants; - _xfrac = ds_xfrac; - _yfrac = ds_yfrac; - _destorg = dc_destorg; - _x1 = ds_x1; - _x2 = ds_x2; - _y = ds_y; - _xstep = ds_xstep; - _ystep = ds_ystep; - _xbits = ds_xbits; - _ybits = ds_ybits; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - } - void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(_y)) - return; + LoopIterator loop(this, thread); + if (!loop) return; - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const uint32_t* source = _source; - int count; - int spot; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - xfrac = _xfrac; - yfrac = _yfrac; - - dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - - count = _x2 - _x1 + 1; - - xstep = _xstep; - ystep = _ystep; - - if (_xbits == 6 && _ybits == 6) + if (loop.is_64x64) { - // 64x64 is the most common case by far, so special case it. do { - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - - uint32_t fg = shade_bgra(source[spot], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; - - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; do { - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - - uint32_t fg = shade_bgra(source[spot], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest++ = 0xff000000 | (red << 16) | (green << 8) | blue; - - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.spot()], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); } } }; -class DrawSpanMaskedAddClampRGBACommand : public DrawerCommand +class DrawSpanMaskedAddClampRGBACommand : public DrawerSpanCommand { - const uint32_t * RESTRICT _source; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _xfrac; - fixed_t _yfrac; - BYTE * RESTRICT _destorg; - int _x1; - int _x2; - int _y1; - int _y; - fixed_t _xstep; - fixed_t _ystep; - int _xbits; - int _ybits; - fixed_t _srcalpha; - fixed_t _destalpha; - public: - DrawSpanMaskedAddClampRGBACommand() - { - _source = (const uint32_t*)ds_source; - _light = ds_light; - _shade_constants = ds_shade_constants; - _xfrac = ds_xfrac; - _yfrac = ds_yfrac; - _destorg = dc_destorg; - _x1 = ds_x1; - _x2 = ds_x2; - _y = ds_y; - _xstep = ds_xstep; - _ystep = ds_ystep; - _xbits = ds_xbits; - _ybits = ds_ybits; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - } - void Execute(DrawerThread *thread) override { - if (thread->line_skipped_by_thread(_y)) - return; + LoopIterator loop(this, thread); + if (!loop) return; - dsfixed_t xfrac; - dsfixed_t yfrac; - dsfixed_t xstep; - dsfixed_t ystep; - uint32_t* dest; - const uint32_t* source = _source; - int count; - int spot; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - xfrac = _xfrac; - yfrac = _yfrac; - - dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - - count = _x2 - _x1 + 1; - - xstep = _xstep; - ystep = _ystep; - - if (_xbits == 6 && _ybits == 6) + if (loop.is_64x64) { - // 64x64 is the most common case by far, so special case it. do { - uint32_t texdata; - - spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); - texdata = source[spot]; - if (texdata != 0) - { - uint32_t fg = shade_bgra(texdata, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, calc_blend_bgalpha(fg, _destalpha)); + } while (loop.next()); } else { - BYTE yshift = 32 - _ybits; - BYTE xshift = yshift - _xbits; - int xmask = ((1 << _xbits) - 1) << _ybits; do { - uint32_t texdata; - - spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); - texdata = source[spot]; - if (texdata != 0) - { - uint32_t fg = shade_bgra(texdata, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = (fg) & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * fg_alpha + bg_red * bg_alpha) / 256; - uint32_t green = (fg_green * fg_alpha + bg_green * bg_alpha) / 256; - uint32_t blue = (fg_blue * fg_alpha + bg_blue * bg_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - } - dest++; - xfrac += xstep; - yfrac += ystep; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.spot()], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, calc_blend_bgalpha(fg, _destalpha)); + } while (loop.next()); } } }; @@ -2296,13 +1216,15 @@ public: uint32_t *dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; int count = (_x2 - _x1 + 1); - uint32_t light = calc_light_multiplier(_light); - uint32_t color = shade_pal_index_simple(_color, light); + uint32_t light = LightBgra::calc_light_multiplier(_light); + uint32_t color = LightBgra::shade_pal_index_simple(_color, light); for (int i = 0; i < count; i++) dest[i] = color; } }; +///////////////////////////////////////////////////////////////////////////// + class DrawSlabRGBACommand : public DrawerCommand { int _dx; @@ -2344,7 +1266,7 @@ public: uint32_t *p = _p; ShadeConstants shade_constants = _shade_constants; const BYTE *colormap = _colormap; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); int pitch = _pitch; int x; @@ -2358,7 +1280,7 @@ public: { while (dy > 0) { - *p = shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); + *p = LightBgra::shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); p += pitch; v += vi; dy--; @@ -2368,7 +1290,7 @@ public: { while (dy > 0) { - uint32_t color = shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); + uint32_t color = LightBgra::shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); p[0] = color; p[1] = color; p += pitch; @@ -2380,7 +1302,7 @@ public: { while (dy > 0) { - uint32_t color = shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); + uint32_t color = LightBgra::shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); p[0] = color; p[1] = color; p[2] = color; @@ -2393,7 +1315,7 @@ public: { while (dy > 0) { - uint32_t color = shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); + uint32_t color = LightBgra::shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); p[0] = color; p[1] = color; p[2] = color; @@ -2405,7 +1327,7 @@ public: } else while (dy > 0) { - uint32_t color = shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); + uint32_t color = LightBgra::shade_pal_index(colormap[vptr[v >> FRACBITS]], light, shade_constants); // The optimizer will probably turn this into a memset call. // Since dx is not likely to be large, I'm not sure that's a good thing, // hence the alternatives above. @@ -2420,1000 +1342,484 @@ public: } }; -class Vlinec1RGBACommand : public DrawerCommand +///////////////////////////////////////////////////////////////////////////// + +class DrawerWall1Command : public DrawerCommand { - DWORD _iscale; - DWORD _texturefrac; - int _count; - const BYTE * RESTRICT _source; - const BYTE * RESTRICT _source2; - uint32_t _texturefracx; +public: BYTE * RESTRICT _dest; - int vlinebits; int _pitch; - fixed_t _light; + int _count; + DWORD _texturefrac; + uint32_t _texturefracx; + DWORD _iscale; + int _vlinebits; + uint32_t _vlinemax; + + const uint32 * RESTRICT _source; + const uint32 * RESTRICT _source2; + uint32_t _light; ShadeConstants _shade_constants; -public: - Vlinec1RGBACommand() + uint32_t _srcalpha; + uint32_t _destalpha; + + DrawerWall1Command(int vlinebits, uint32_t vlinemax) { - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _count = dc_count; - _source = dc_source; - _source2 = dc_source2; - _texturefracx = dc_texturefracx; _dest = dc_dest; - vlinebits = ::vlinebits; _pitch = dc_pitch; - _light = dc_light; + _count = dc_count; + _texturefrac = dc_texturefrac; + _texturefracx = dc_texturefracx; + _iscale = dc_iscale; + _vlinebits = vlinebits; + _vlinemax = vlinemax; + + _source = (const uint32 *)dc_source; + _source2 = (const uint32 *)dc_source2; + _light = LightBgra::calc_light_multiplier(dc_light); _shade_constants = dc_shade_constants; + + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); + } + + class LoopIterator + { + public: + uint32_t *dest; + int pitch; + int count; + uint32_t fracstep; + uint32_t frac; + uint32_t texturefracx; + int bits; + + LoopIterator(DrawerWall1Command *command, DrawerThread *thread) + { + count = thread->count_for_thread(command->_dest_y, command->_count); + if (count <= 0) + return; + + fracstep = command->_iscale * thread->num_cores; + frac = command->_texturefrac + command->_iscale * thread->skipped_by_thread(command->_dest_y); + texturefracx = command->_texturefracx; + dest = thread->dest_for_thread(command->_dest_y, command->_pitch, (uint32_t*)command->_dest); + bits = command->_vlinebits; + pitch = command->_pitch * thread->num_cores; + } + + explicit operator bool() + { + return count > 0; + } + + int sample_index() + { + return frac >> bits; + } + + bool next() + { + frac += fracstep; + dest += pitch; + return (--count) != 0; + } + }; +}; + +class DrawerWall4Command : public DrawerCommand +{ +public: + BYTE * RESTRICT _dest; + int _count; + int _pitch; + int _vlinebits; + uint32_t _vlinemax; + ShadeConstants _shade_constants; + uint32_t _vplce[4]; + uint32_t _vince[4]; + uint32_t _buftexturefracx[4]; + const uint32_t * RESTRICT _bufplce[4]; + const uint32_t * RESTRICT _bufplce2[4]; + uint32_t _light[4]; + + uint32_t _srcalpha; + uint32_t _destalpha; + + DrawerWall4Command(int vlinebits, uint32_t vlinemax) + { + _dest = dc_dest; + _count = dc_count; + _pitch = dc_pitch; + _vlinebits = vlinebits; + _vlinemax = vlinemax; + _shade_constants = dc_shade_constants; + for (int i = 0; i < 4; i++) + { + _vplce[i] = vplce[i]; + _vince[i] = vince[i]; + _buftexturefracx[i] = buftexturefracx[i]; + _bufplce[i] = (const uint32_t *)bufplce[i]; + _bufplce2[i] = (const uint32_t *)bufplce2[i]; + _light[i] = LightBgra::calc_light_multiplier(palookuplight[i]); + } + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); + } + + class LoopIterator + { + public: + uint32_t *dest; + int pitch; + int count; + int bits; + uint32_t vplce[4]; + uint32_t vince[4]; + + LoopIterator(DrawerWall4Command *command, DrawerThread *thread) + { + count = thread->count_for_thread(command->_dest_y, command->_count); + if (count <= 0) + return; + + dest = thread->dest_for_thread(command->_dest_y, command->_pitch, (uint32_t*)command->_dest); + pitch = command->_pitch * thread->num_cores; + bits = command->_vlinebits; + + int skipped = thread->skipped_by_thread(command->_dest_y); + for (int i = 0; i < 4; i++) + { + vplce[i] = command->_vplce[i] + command->_vince[i] * skipped; + vince[i] = command->_vince[i] * thread->num_cores; + } + } + + explicit operator bool() + { + return count > 0; + } + + int sample_index(int col) + { + return vplce[col] >> bits; + } + + bool next() + { + vplce[0] += vince[0]; + vplce[1] += vince[1]; + vplce[2] += vince[2]; + vplce[3] += vince[3]; + dest += pitch; + return (--count) != 0; + } + }; +}; + +class Vlinec1RGBACommand : public DrawerWall1Command +{ +public: + Vlinec1RGBACommand() : DrawerWall1Command(vlinebits, vlinemax) + { } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - DWORD fracstep = _iscale * thread->num_cores; - DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - const uint32 *source = (const uint32 *)_source; - const uint32 *source2 = (const uint32 *)_source2; - uint32_t texturefracx = _texturefracx; - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int bits = vlinebits; - int pitch = _pitch * thread->num_cores; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; + LoopIterator loop(this, thread); + if (!loop) return; if (_source2 == nullptr) { do { - *dest = shade_bgra(source[frac >> bits], light, shade_constants); - frac += fracstep; - dest += pitch; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.sample_index()], _light, _shade_constants); + *loop.dest = BlendBgra::copy(fg); + } while (loop.next()); } else { do { - *dest = shade_bgra(sample_bilinear(source, source2, texturefracx, frac, bits), light, shade_constants); - frac += fracstep; - dest += pitch; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.bits, _vlinemax), _light, _shade_constants); + *loop.dest = BlendBgra::copy(fg); + } while (loop.next()); } } }; -class Vlinec4RGBACommand : public DrawerCommand +class Vlinec4RGBACommand : public DrawerWall4Command { - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - int vlinebits; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32_t * RESTRICT bufplce[4]; - const uint32_t * RESTRICT bufplce2[4]; - uint32_t buftexturefracx[4]; - public: - Vlinec4RGBACommand() + Vlinec4RGBACommand() : DrawerWall4Command(vlinebits, vlinemax) { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - vlinebits = ::vlinebits; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32_t *)::bufplce[i]; - bufplce2[i] = (const uint32_t *)::bufplce2[i]; - buftexturefracx[i] = ::buftexturefracx[i]; - } } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; + LoopIterator loop(this, thread); + if (!loop) return; - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - int bits = vlinebits; - DWORD place; - - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - - if (bufplce2[0] == nullptr) + if (_bufplce2[0] == nullptr) { do { - dest[0] = shade_bgra(bufplce[0][(place = local_vplce[0]) >> bits], light0, shade_constants); local_vplce[0] = place + local_vince[0]; - dest[1] = shade_bgra(bufplce[1][(place = local_vplce[1]) >> bits], light1, shade_constants); local_vplce[1] = place + local_vince[1]; - dest[2] = shade_bgra(bufplce[2][(place = local_vplce[2]) >> bits], light2, shade_constants); local_vplce[2] = place + local_vince[2]; - dest[3] = shade_bgra(bufplce[3][(place = local_vplce[3]) >> bits], light3, shade_constants); local_vplce[3] = place + local_vince[3]; - dest += pitch; - } while (--count); + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); + loop.dest[i] = BlendBgra::copy(fg); + } + } while (loop.next()); } else { do { - dest[0] = shade_bgra(sample_bilinear(bufplce[0], bufplce2[0], buftexturefracx[0], place = local_vplce[0], bits), light0, shade_constants); local_vplce[0] = place + local_vince[0]; - dest[1] = shade_bgra(sample_bilinear(bufplce[1], bufplce2[1], buftexturefracx[1], place = local_vplce[1], bits), light1, shade_constants); local_vplce[1] = place + local_vince[1]; - dest[2] = shade_bgra(sample_bilinear(bufplce[2], bufplce2[2], buftexturefracx[2], place = local_vplce[2], bits), light2, shade_constants); local_vplce[2] = place + local_vince[2]; - dest[3] = shade_bgra(sample_bilinear(bufplce[3], bufplce2[3], buftexturefracx[3], place = local_vplce[3], bits), light3, shade_constants); local_vplce[3] = place + local_vince[3]; - dest += pitch; - } while (--count); + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_bufplce[i], _bufplce2[i], _buftexturefracx[i], loop.sample_index(i), loop.bits, _vlinemax), _light[i], _shade_constants); + loop.dest[i] = BlendBgra::copy(fg); + } + } while (loop.next()); } } }; -class Mvlinec1RGBACommand : public DrawerCommand +class Mvlinec1RGBACommand : public DrawerWall1Command { - DWORD _iscale; - DWORD _texturefrac; - int _count; - const BYTE * RESTRICT _source; - const BYTE * RESTRICT _source2; - uint32_t _texturefracx; - BYTE * RESTRICT _dest; - int mvlinebits; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - public: - Mvlinec1RGBACommand() + Mvlinec1RGBACommand() : DrawerWall1Command(mvlinebits, mvlinemax) { - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _count = dc_count; - _source = dc_source; - _source2 = dc_source2; - _texturefracx = dc_texturefracx; - _dest = dc_dest; - mvlinebits = ::mvlinebits; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - DWORD fracstep = _iscale * thread->num_cores; - DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - const uint32 *source = (const uint32 *)_source; - const uint32 *source2 = (const uint32 *)_source2; - uint32_t texturefracx = _texturefracx; - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int bits = mvlinebits; - int pitch = _pitch * thread->num_cores; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; + LoopIterator loop(this, thread); + if (!loop) return; if (_source2 == nullptr) { do { - uint32_t pix = source[frac >> bits]; - *dest = alpha_blend(shade_bgra(pix, light, shade_constants), *dest); - frac += fracstep; - dest += pitch; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.sample_index()], _light, _shade_constants); + *loop.dest = BlendBgra::alpha_blend(fg, *loop.dest); + } while (loop.next()); } else { do { - *dest = alpha_blend(shade_bgra(sample_bilinear(source, source2, texturefracx, frac, bits), light, shade_constants), *dest); - frac += fracstep; - dest += pitch; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.bits, _vlinemax), _light, _shade_constants); + *loop.dest = BlendBgra::alpha_blend(fg, *loop.dest); + } while (loop.next()); } } }; -class Mvlinec4RGBACommand : public DrawerCommand +class Mvlinec4RGBACommand : public DrawerWall4Command { - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - int mvlinebits; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32 * RESTRICT bufplce[4]; - const uint32 * RESTRICT bufplce2[4]; - uint32_t buftexturefracx[4]; - public: - Mvlinec4RGBACommand() + Mvlinec4RGBACommand(): DrawerWall4Command(mvlinebits, mvlinemax) { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - mvlinebits = ::mvlinebits; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; - bufplce2[i] = (const uint32_t *)::bufplce2[i]; - buftexturefracx[i] = ::buftexturefracx[i]; - } } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; + LoopIterator loop(this, thread); + if (!loop) return; - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - int bits = mvlinebits; - DWORD place; - - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - - if (bufplce2[0] == nullptr) + if (_bufplce2[0] == nullptr) { do { - uint32_t pix; - pix = bufplce[0][(place = local_vplce[0]) >> bits]; dest[0] = alpha_blend(shade_bgra(pix, light0, shade_constants), dest[0]); local_vplce[0] = place + local_vince[0]; - pix = bufplce[1][(place = local_vplce[1]) >> bits]; dest[1] = alpha_blend(shade_bgra(pix, light1, shade_constants), dest[1]); local_vplce[1] = place + local_vince[1]; - pix = bufplce[2][(place = local_vplce[2]) >> bits]; dest[2] = alpha_blend(shade_bgra(pix, light2, shade_constants), dest[2]); local_vplce[2] = place + local_vince[2]; - pix = bufplce[3][(place = local_vplce[3]) >> bits]; dest[3] = alpha_blend(shade_bgra(pix, light3, shade_constants), dest[3]); local_vplce[3] = place + local_vince[3]; - dest += pitch; - } while (--count); + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); + loop.dest[i] = BlendBgra::alpha_blend(fg, loop.dest[i]); + } + } while (loop.next()); } else { do { - dest[0] = alpha_blend(shade_bgra(sample_bilinear(bufplce[0], bufplce2[0], buftexturefracx[0], place = local_vplce[0], bits), light0, shade_constants), dest[0]); local_vplce[0] = place + local_vince[0]; - dest[1] = alpha_blend(shade_bgra(sample_bilinear(bufplce[1], bufplce2[1], buftexturefracx[1], place = local_vplce[1], bits), light1, shade_constants), dest[1]); local_vplce[1] = place + local_vince[1]; - dest[2] = alpha_blend(shade_bgra(sample_bilinear(bufplce[2], bufplce2[2], buftexturefracx[2], place = local_vplce[2], bits), light2, shade_constants), dest[2]); local_vplce[2] = place + local_vince[2]; - dest[3] = alpha_blend(shade_bgra(sample_bilinear(bufplce[3], bufplce2[3], buftexturefracx[3], place = local_vplce[3], bits), light3, shade_constants), dest[3]); local_vplce[3] = place + local_vince[3]; - dest += pitch; - } while (--count); + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_bufplce[i], _bufplce2[i], _buftexturefracx[i], loop.sample_index(i), loop.bits, _vlinemax), _light[i], _shade_constants); + loop.dest[i] = BlendBgra::alpha_blend(fg, loop.dest[i]); + } + } while (loop.next()); } } }; -class Tmvline1AddRGBACommand : public DrawerCommand +class Tmvline1AddRGBACommand : public DrawerWall1Command { - DWORD _iscale; - DWORD _texturefrac; - int _count; - const BYTE * RESTRICT _source; - BYTE * RESTRICT _dest; - int tmvlinebits; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - public: - Tmvline1AddRGBACommand() + Tmvline1AddRGBACommand() : DrawerWall1Command(tmvlinebits, tmvlinemax) { - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _count = dc_count; - _source = dc_source; - _dest = dc_dest; - tmvlinebits = ::tmvlinebits; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - DWORD fracstep = _iscale * thread->num_cores; - DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - const uint32 *source = (const uint32 *)_source; - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int bits = tmvlinebits; - int pitch = _pitch * thread->num_cores; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - + LoopIterator loop(this, thread); + if (!loop) return; do { - uint32_t pix = source[frac >> bits]; - - uint32_t fg_alpha = src_alpha; - uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); - - uint32_t fg = shade_bgra(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - - frac += fracstep; - dest += pitch; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.sample_index()], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, calc_blend_bgalpha(fg, _destalpha)); + } while (loop.next()); } }; -class Tmvline4AddRGBACommand : public DrawerCommand +class Tmvline4AddRGBACommand : public DrawerWall4Command { - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - int tmvlinebits; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32 * RESTRICT bufplce[4]; - public: - Tmvline4AddRGBACommand() + Tmvline4AddRGBACommand() : DrawerWall4Command(tmvlinebits, tmvlinemax) { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - tmvlinebits = ::tmvlinebits; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; - } } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - int bits = tmvlinebits; - - uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - + LoopIterator loop(this, thread); + if (!loop) return; do { - for (int i = 0; i < 4; ++i) + for (int i = 0; i < 4; i++) { - uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - - uint32_t fg_alpha = src_alpha; - uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); - - uint32_t fg = shade_bgra(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - - local_vplce[i] += local_vince[i]; + uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); + loop.dest[i] = BlendBgra::add(fg, loop.dest[i], _srcalpha, calc_blend_bgalpha(fg, _destalpha)); } - dest += pitch; - } while (--count); + } while (loop.next()); } }; -class Tmvline1AddClampRGBACommand : public DrawerCommand +class Tmvline1AddClampRGBACommand : public DrawerWall1Command { - DWORD _iscale; - DWORD _texturefrac; - int _count; - const BYTE * RESTRICT _source; - BYTE * RESTRICT _dest; - int tmvlinebits; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - public: - Tmvline1AddClampRGBACommand() + Tmvline1AddClampRGBACommand() : DrawerWall1Command(tmvlinebits, tmvlinemax) { - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _count = dc_count; - _source = dc_source; - _dest = dc_dest; - tmvlinebits = ::tmvlinebits; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - DWORD fracstep = _iscale * thread->num_cores; - DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - const uint32 *source = (const uint32 *)_source; - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int bits = tmvlinebits; - int pitch = _pitch * thread->num_cores; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - + LoopIterator loop(this, thread); + if (!loop) return; do { - uint32_t pix = source[frac >> bits]; - - uint32_t fg_alpha = src_alpha; - uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); - - uint32_t fg = shade_bgra(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - - frac += fracstep; - dest += pitch; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.sample_index()], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, calc_blend_bgalpha(fg, _destalpha)); + } while (loop.next()); } }; -class Tmvline4AddClampRGBACommand : public DrawerCommand +class Tmvline4AddClampRGBACommand : public DrawerWall4Command { - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - int tmvlinebits; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32 *RESTRICT bufplce[4]; - public: - Tmvline4AddClampRGBACommand() + Tmvline4AddClampRGBACommand() : DrawerWall4Command(tmvlinebits, tmvlinemax) { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - tmvlinebits = ::tmvlinebits; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; - } } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - int bits = tmvlinebits; - - uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - + LoopIterator loop(this, thread); + if (!loop) return; do { - for (int i = 0; i < 4; ++i) + for (int i = 0; i < 4; i++) { - uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - - uint32_t fg_alpha = src_alpha; - uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); - - uint32_t fg = shade_bgra(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - - local_vplce[i] += local_vince[i]; + uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); + loop.dest[i] = BlendBgra::add(fg, loop.dest[i], _srcalpha, calc_blend_bgalpha(fg, _destalpha)); } - dest += pitch; - } while (--count); + } while (loop.next()); } }; -class Tmvline1SubClampRGBACommand : public DrawerCommand +class Tmvline1SubClampRGBACommand : public DrawerWall1Command { - DWORD _iscale; - DWORD _texturefrac; - int _count; - const BYTE * RESTRICT _source; - BYTE * RESTRICT _dest; - int tmvlinebits; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - public: - Tmvline1SubClampRGBACommand() + Tmvline1SubClampRGBACommand() : DrawerWall1Command(tmvlinebits, tmvlinemax) { - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _count = dc_count; - _source = dc_source; - _dest = dc_dest; - tmvlinebits = ::tmvlinebits; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - DWORD fracstep = _iscale * thread->num_cores; - DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - const uint32 *source = (const uint32 *)_source; - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int bits = tmvlinebits; - int pitch = _pitch * thread->num_cores; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - + LoopIterator loop(this, thread); + if (!loop) return; do { - uint32_t pix = source[frac >> bits]; - - uint32_t fg_alpha = src_alpha; - uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); - - uint32_t fg = shade_bgra(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - - frac += fracstep; - dest += pitch; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.sample_index()], _light, _shade_constants); + *loop.dest = BlendBgra::sub(fg, *loop.dest, _srcalpha, calc_blend_bgalpha(fg, _destalpha)); + } while (loop.next()); } }; -class Tmvline4SubClampRGBACommand : public DrawerCommand +class Tmvline4SubClampRGBACommand : public DrawerWall4Command { - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - int tmvlinebits; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32 *RESTRICT bufplce[4]; - public: - Tmvline4SubClampRGBACommand() + Tmvline4SubClampRGBACommand() : DrawerWall4Command(tmvlinebits, tmvlinemax) { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - tmvlinebits = ::tmvlinebits; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; - } } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - int bits = tmvlinebits; - - uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - + LoopIterator loop(this, thread); + if (!loop) return; do { - for (int i = 0; i < 4; ++i) + for (int i = 0; i < 4; i++) { - uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - - uint32_t fg_alpha = src_alpha; - uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); - - uint32_t fg = shade_bgra(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - - local_vplce[i] += local_vince[i]; + uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); + loop.dest[i] = BlendBgra::sub(fg, loop.dest[i], _srcalpha, calc_blend_bgalpha(fg, _destalpha)); } - dest += pitch; - } while (--count); + } while (loop.next()); } }; -class Tmvline1RevSubClampRGBACommand : public DrawerCommand +class Tmvline1RevSubClampRGBACommand : public DrawerWall1Command { - DWORD _iscale; - DWORD _texturefrac; - int _count; - const BYTE * RESTRICT _source; - BYTE * RESTRICT _dest; - int tmvlinebits; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - public: - Tmvline1RevSubClampRGBACommand() + Tmvline1RevSubClampRGBACommand() : DrawerWall1Command(tmvlinebits, tmvlinemax) { - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _count = dc_count; - _source = dc_source; - _dest = dc_dest; - tmvlinebits = ::tmvlinebits; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - DWORD fracstep = _iscale * thread->num_cores; - DWORD frac = _texturefrac + _iscale * thread->skipped_by_thread(_dest_y); - const uint32 *source = (const uint32 *)_source; - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int bits = tmvlinebits; - int pitch = _pitch * thread->num_cores; - - uint32_t light = calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - + LoopIterator loop(this, thread); + if (!loop) return; do { - uint32_t pix = source[frac >> bits]; - - uint32_t fg_alpha = src_alpha; - uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); - - uint32_t fg = shade_bgra(pix, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - - frac += fracstep; - dest += pitch; - } while (--count); + uint32_t fg = LightBgra::shade_bgra(_source[loop.sample_index()], _light, _shade_constants); + *loop.dest = BlendBgra::revsub(fg, *loop.dest, _srcalpha, calc_blend_bgalpha(fg, _destalpha)); + } while (loop.next()); } }; -class Tmvline4RevSubClampRGBACommand : public DrawerCommand +class Tmvline4RevSubClampRGBACommand : public DrawerWall4Command { - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - int tmvlinebits; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32 *RESTRICT bufplce[4]; - public: - Tmvline4RevSubClampRGBACommand() + Tmvline4RevSubClampRGBACommand() : DrawerWall4Command(tmvlinebits, tmvlinemax) { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - tmvlinebits = ::tmvlinebits; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; - } } void Execute(DrawerThread *thread) override { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - int bits = tmvlinebits; - - uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - + LoopIterator loop(this, thread); + if (!loop) return; do { - for (int i = 0; i < 4; ++i) + for (int i = 0; i < 4; i++) { - uint32_t pix = bufplce[i][local_vplce[i] >> bits]; - - uint32_t fg_alpha = src_alpha; - uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha); - - uint32_t fg = shade_bgra(pix, light[i], shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - - local_vplce[i] += local_vince[i]; + uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); + loop.dest[i] = BlendBgra::revsub(fg, loop.dest[i], _srcalpha, calc_blend_bgalpha(fg, _destalpha)); } - dest += pitch; - } while (--count); + } while (loop.next()); } }; +///////////////////////////////////////////////////////////////////////////// + class DrawFogBoundaryLineRGBACommand : public DrawerCommand { int _y; @@ -3446,7 +1852,7 @@ public: uint32_t *dest = ylookup[y] + (uint32_t*)_destorg; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants constants = _shade_constants; do @@ -3563,8 +1969,8 @@ public: uint32_t *dest = ylookup[y] + x1 + (uint32_t*)_destorg; int count = (x2 - x1 + 1); - uint32_t light = calc_light_multiplier(_light); - uint32_t color = shade_pal_index_simple(_color, light); + uint32_t light = LightBgra::calc_light_multiplier(_light); + uint32_t color = LightBgra::shade_pal_index_simple(_color, light); for (int i = 0; i < count; i++) dest[i] = color; } diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 8f97d4ecd..20fff4fc0 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -286,7 +286,7 @@ public: }; ///////////////////////////////////////////////////////////////////////////// -// Pixel shading macros and inline functions: +// Pixel shading inline functions: // Give the compiler a strong hint we want these functions inlined: #ifndef FORCEINLINE @@ -310,220 +310,256 @@ public: #endif #endif -// calculates the light constant passed to the shade_pal_index function -FORCEINLINE uint32_t calc_light_multiplier(dsfixed_t light) +class LightBgra { - return 256 - (light >> (FRACBITS - 8)); -} - -// Calculates a ARGB8 color for the given palette index and light multiplier -FORCEINLINE uint32_t shade_pal_index_simple(uint32_t index, uint32_t light) -{ - const PalEntry &color = GPalette.BaseColors[index]; - uint32_t red = color.r; - uint32_t green = color.g; - uint32_t blue = color.b; - - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; - - return 0xff000000 | (red << 16) | (green << 8) | blue; -} - -FORCEINLINE uint32_t shade_bgra_simple(uint32_t color, uint32_t light) -{ - uint32_t red = (color >> 16) & 0xff; - uint32_t green = (color >> 8) & 0xff; - uint32_t blue = color & 0xff; - - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; - - return 0xff000000 | (red << 16) | (green << 8) | blue; -} - -// Calculates a ARGB8 color for the given palette index, light multiplier and dynamic colormap -FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants) -{ - const PalEntry &color = GPalette.BaseColors[index]; - uint32_t alpha = color.d & 0xff000000; - uint32_t red = color.r; - uint32_t green = color.g; - uint32_t blue = color.b; - if (constants.simple_shade) +public: + // calculates the light constant passed to the shade_pal_index function + FORCEINLINE static uint32_t calc_light_multiplier(dsfixed_t light) { + return 256 - (light >> (FRACBITS - 8)); + } + + // Calculates a ARGB8 color for the given palette index and light multiplier + FORCEINLINE static uint32_t shade_pal_index_simple(uint32_t index, uint32_t light) + { + const PalEntry &color = GPalette.BaseColors[index]; + uint32_t red = color.r; + uint32_t green = color.g; + uint32_t blue = color.b; + red = red * light / 256; green = green * light / 256; blue = blue * light / 256; + + return 0xff000000 | (red << 16) | (green << 8) | blue; } - else + + // Calculates a ARGB8 color for the given palette index, light multiplier and dynamic colormap + FORCEINLINE static uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants) { - uint32_t inv_light = 256 - light; - uint32_t inv_desaturate = 256 - constants.desaturate; - - uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; - - red = (red * inv_desaturate + intensity) / 256; - green = (green * inv_desaturate + intensity) / 256; - blue = (blue * inv_desaturate + intensity) / 256; - - red = (constants.fade_red * inv_light + red * light) / 256; - green = (constants.fade_green * inv_light + green * light) / 256; - blue = (constants.fade_blue * inv_light + blue * light) / 256; - - red = (red * constants.light_red) / 256; - green = (green * constants.light_green) / 256; - blue = (blue * constants.light_blue) / 256; - } - return alpha | (red << 16) | (green << 8) | blue; -} - -FORCEINLINE uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConstants &constants) -{ - uint32_t alpha = color & 0xff000000; - uint32_t red = (color >> 16) & 0xff; - uint32_t green = (color >> 8) & 0xff; - uint32_t blue = color & 0xff; - if (constants.simple_shade) - { - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; - } - else - { - uint32_t inv_light = 256 - light; - uint32_t inv_desaturate = 256 - constants.desaturate; - - uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; - - red = (red * inv_desaturate + intensity) / 256; - green = (green * inv_desaturate + intensity) / 256; - blue = (blue * inv_desaturate + intensity) / 256; - - red = (constants.fade_red * inv_light + red * light) / 256; - green = (constants.fade_green * inv_light + green * light) / 256; - blue = (constants.fade_blue * inv_light + blue * light) / 256; - - red = (red * constants.light_red) / 256; - green = (green * constants.light_green) / 256; - blue = (blue * constants.light_blue) / 256; - } - return alpha | (red << 16) | (green << 8) | blue; -} - -FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) -{ - uint32_t fg_alpha = fg >> 24; - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t alpha = fg_alpha + (fg_alpha >> 7); // 255 -> 256 - uint32_t inv_alpha = 256 - alpha; - - uint32_t bg_red = (bg >> 16) & 0xff; - uint32_t bg_green = (bg >> 8) & 0xff; - uint32_t bg_blue = bg & 0xff; - - uint32_t red = clamp(fg_red + (bg_red * inv_alpha) / 256, 0, 255); - uint32_t green = clamp(fg_green + (bg_green * inv_alpha) / 256, 0, 255); - uint32_t blue = clamp(fg_blue + (bg_blue * inv_alpha) / 256, 0, 255); - - return 0xff000000 | (red << 16) | (green << 8) | blue; -} - -inline bool span_sampler_setup(const uint32_t * RESTRICT &source, int &xbits, int &ybits, fixed_t xstep, fixed_t ystep) -{ - if (!r_bilinear) - return false; - - // Is this a magfilter or minfilter? - fixed_t xmagnitude = abs(xstep) >> (32 - xbits - FRACBITS); - fixed_t ymagnitude = abs(ystep) >> (32 - ybits - FRACBITS); - fixed_t magnitude = (xmagnitude + ymagnitude) * 2 + (1 << (FRACBITS -1)); - if (magnitude >> FRACBITS == 0) - return false; - - if (r_mipmap) - { - int level = magnitude >> (FRACBITS + 1); - while (level != 0) + const PalEntry &color = GPalette.BaseColors[index]; + uint32_t alpha = color.d & 0xff000000; + uint32_t red = color.r; + uint32_t green = color.g; + uint32_t blue = color.b; + if (constants.simple_shade) { - if (xbits <= 2 || ybits <= 2) - break; - - source += (1 << (xbits)) * (1 << (ybits)); - xbits -= 1; - ybits -= 1; - level >>= 1; + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; + + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; + + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; + + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + return alpha | (red << 16) | (green << 8) | blue; } - return true; -} -FORCEINLINE uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, int ybits) + FORCEINLINE static uint32_t shade_bgra_simple(uint32_t color, uint32_t light) + { + uint32_t red = RPART(color) * light / 256; + uint32_t green = GPART(color) * light / 256; + uint32_t blue = BPART(color) * light / 256; + return 0xff000000 | (red << 16) | (green << 8) | blue; + } + + FORCEINLINE static uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConstants &constants) + { + uint32_t alpha = color & 0xff000000; + uint32_t red = (color >> 16) & 0xff; + uint32_t green = (color >> 8) & 0xff; + uint32_t blue = color & 0xff; + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; + + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; + + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; + + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + return alpha | (red << 16) | (green << 8) | blue; + } +}; + +class BlendBgra { - uint32_t half = 1 << (ybits - 1); - uint32_t y = (texturefracy - half) >> ybits; +public: + FORCEINLINE static uint32_t copy(uint32_t fg) + { + return fg; + } - uint32_t p00 = col0[y]; - uint32_t p01 = col0[y + 1]; - uint32_t p10 = col1[y]; - uint32_t p11 = col1[y + 1]; + FORCEINLINE static uint32_t add(uint32_t fg, uint32_t bg, uint32_t srcalpha, uint32_t destalpha) + { + uint32_t red = MIN((RPART(fg) * srcalpha + RPART(bg) * destalpha) >> 8, 255); + uint32_t green = MIN((GPART(fg) * srcalpha + GPART(bg) * destalpha) >> 8, 255); + uint32_t blue = MIN((BPART(fg) * srcalpha + BPART(bg) * destalpha) >> 8, 255); + return 0xff000000 | (red << 16) | (green << 8) | blue; + } - uint32_t inv_b = texturefracx; - uint32_t inv_a = ((texturefracy + half) >> (ybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; + FORCEINLINE static uint32_t sub(uint32_t fg, uint32_t bg, uint32_t srcalpha, uint32_t destalpha) + { + uint32_t red = clamp((0x10000 - RPART(fg) * srcalpha + RPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 - GPART(fg) * srcalpha + GPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 - BPART(fg) * srcalpha + BPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256; + return 0xff000000 | (red << 16) | (green << 8) | blue; + } - uint32_t red = (RPART(p00) * a * b + RPART(p01) * inv_a * b + RPART(p10) * a * inv_b + RPART(p11) * inv_a * inv_b + 127) >> 8; - uint32_t green = (GPART(p00) * a * b + GPART(p01) * inv_a * b + GPART(p10) * a * inv_b + GPART(p11) * inv_a * inv_b + 127) >> 8; - uint32_t blue = (BPART(p00) * a * b + BPART(p01) * inv_a * b + BPART(p10) * a * inv_b + BPART(p11) * inv_a * inv_b + 127) >> 8; - uint32_t alpha = (APART(p00) * a * b + APART(p01) * inv_a * b + APART(p10) * a * inv_b + APART(p11) * inv_a * inv_b + 127) >> 8; + FORCEINLINE static uint32_t revsub(uint32_t fg, uint32_t bg, uint32_t srcalpha, uint32_t destalpha) + { + uint32_t red = clamp((0x10000 + RPART(fg) * srcalpha - RPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256; + uint32_t green = clamp((0x10000 + GPART(fg) * srcalpha - GPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256; + uint32_t blue = clamp((0x10000 + BPART(fg) * srcalpha - BPART(bg) * destalpha) >> 8, 256, 256 + 255) - 256; + return 0xff000000 | (red << 16) | (green << 8) | blue; + } - return (alpha << 24) | (red << 16) | (green << 8) | blue; -} + FORCEINLINE static uint32_t alpha_blend(uint32_t fg, uint32_t bg) + { + uint32_t alpha = APART(fg) + (APART(fg) >> 7); // 255 -> 256 + uint32_t inv_alpha = 256 - alpha; + uint32_t red = MIN(RPART(fg) + (RPART(bg) * inv_alpha) / 256, 255); + uint32_t green = MIN(GPART(fg) + (GPART(bg) * inv_alpha) / 256, 255); + uint32_t blue = MIN(BPART(fg) + (BPART(bg) * inv_alpha) / 256, 255); + return 0xff000000 | (red << 16) | (green << 8) | blue; + } +}; -FORCEINLINE uint32_t sample_bilinear(const uint32_t *texture, dsfixed_t xfrac, dsfixed_t yfrac, int xbits, int ybits) +class SampleBgra { - int xshift = (32 - xbits); - int yshift = (32 - ybits); - int xmask = (1 << xshift) - 1; - int ymask = (1 << yshift) - 1; - uint32_t xhalf = 1 << (xbits - 1); - uint32_t yhalf = 1 << (ybits - 1); - uint32_t x = (xfrac - xhalf) >> xbits; - uint32_t y = (yfrac - yhalf) >> ybits; +public: + inline static bool span_sampler_setup(const uint32_t * RESTRICT &source, int &xbits, int &ybits, fixed_t xstep, fixed_t ystep) + { + if (!r_bilinear) + return false; - uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; - uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)]; - uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; - uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)]; + // Is this a magfilter or minfilter? + fixed_t xmagnitude = abs(xstep) >> (32 - xbits - FRACBITS); + fixed_t ymagnitude = abs(ystep) >> (32 - ybits - FRACBITS); + fixed_t magnitude = (xmagnitude + ymagnitude) * 2 + (1 << (FRACBITS - 1)); + if (magnitude >> FRACBITS == 0) + return false; - uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; - uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; - uint32_t a = 16 - inv_a; - uint32_t b = 16 - inv_b; + if (r_mipmap) + { + int level = magnitude >> (FRACBITS + 1); + while (level != 0) + { + if (xbits <= 2 || ybits <= 2) + break; - uint32_t red = (RPART(p00) * a * b + RPART(p01) * inv_a * b + RPART(p10) * a * inv_b + RPART(p11) * inv_a * inv_b + 127) >> 8; - uint32_t green = (GPART(p00) * a * b + GPART(p01) * inv_a * b + GPART(p10) * a * inv_b + GPART(p11) * inv_a * inv_b + 127) >> 8; - uint32_t blue = (BPART(p00) * a * b + BPART(p01) * inv_a * b + BPART(p10) * a * inv_b + BPART(p11) * inv_a * inv_b + 127) >> 8; - uint32_t alpha = (APART(p00) * a * b + APART(p01) * inv_a * b + APART(p10) * a * inv_b + APART(p11) * inv_a * inv_b + 127) >> 8; + source += (1 << (xbits)) * (1 << (ybits)); + xbits -= 1; + ybits -= 1; + level >>= 1; + } + } + return true; + } - return (alpha << 24) | (red << 16) | (green << 8) | blue; -} + FORCEINLINE static uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, int ybits, uint32_t ymax) + { + uint32_t half = 1 << (ybits - 1); + uint32_t y0 = (texturefracy - half) >> ybits; + if (y0 > ymax) + y0 = 0; + uint32_t y1 = y0 + 1; + if (y1 > ymax) + y1 = 0; -#define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, ybits) { \ + uint32_t p00 = col0[y0]; + uint32_t p01 = col0[y1]; + uint32_t p10 = col1[y0]; + uint32_t p11 = col1[y1]; + + uint32_t inv_b = texturefracx; + uint32_t inv_a = ((texturefracy + half) >> (ybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t red = (RPART(p00) * a * b + RPART(p01) * inv_a * b + RPART(p10) * a * inv_b + RPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t green = (GPART(p00) * a * b + GPART(p01) * inv_a * b + GPART(p10) * a * inv_b + GPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t blue = (BPART(p00) * a * b + BPART(p01) * inv_a * b + BPART(p10) * a * inv_b + BPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t alpha = (APART(p00) * a * b + APART(p01) * inv_a * b + APART(p10) * a * inv_b + APART(p11) * inv_a * inv_b + 127) >> 8; + + return (alpha << 24) | (red << 16) | (green << 8) | blue; + } + + FORCEINLINE static uint32_t sample_bilinear(const uint32_t *texture, dsfixed_t xfrac, dsfixed_t yfrac, int xbits, int ybits) + { + int xshift = (32 - xbits); + int yshift = (32 - ybits); + int xmask = (1 << xshift) - 1; + int ymask = (1 << yshift) - 1; + uint32_t xhalf = 1 << (xbits - 1); + uint32_t yhalf = 1 << (ybits - 1); + uint32_t x = (xfrac - xhalf) >> xbits; + uint32_t y = (yfrac - yhalf) >> ybits; + + uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; + uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)]; + uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; + uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)]; + + uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; + uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; + uint32_t a = 16 - inv_a; + uint32_t b = 16 - inv_b; + + uint32_t red = (RPART(p00) * a * b + RPART(p01) * inv_a * b + RPART(p10) * a * inv_b + RPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t green = (GPART(p00) * a * b + GPART(p01) * inv_a * b + GPART(p10) * a * inv_b + GPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t blue = (BPART(p00) * a * b + BPART(p01) * inv_a * b + BPART(p10) * a * inv_b + BPART(p11) * inv_a * inv_b + 127) >> 8; + uint32_t alpha = (APART(p00) * a * b + APART(p01) * inv_a * b + APART(p10) * a * inv_b + APART(p11) * inv_a * inv_b + 127) >> 8; + + return (alpha << 24) | (red << 16) | (green << 8) | blue; + } +}; + +///////////////////////////////////////////////////////////////////////////// +// SSE/AVX shading macros: + +#define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, ybits, ymax) { \ uint32_t half = 1 << (ybits - 1); \ \ __m128i m127 = _mm_set1_epi16(127); \ fg = _mm_setzero_si128(); \ for (int i = 0; i < 4; i++) \ { \ - uint32_t y = (texturefracy[i] - half) >> ybits; \ + uint32_t y0 = (texturefracy[i] - half) >> ybits; \ + if (y0 > ymax) y0 = 0; \ + uint32_t y1 = y0 + 1; \ + if (y1 > ymax) y1 = 0; \ \ uint32_t inv_b = texturefracx[i]; \ uint32_t inv_a = ((texturefracy[i] + half) >> (ybits - 4)) & 15; \ @@ -537,8 +573,8 @@ FORCEINLINE uint32_t sample_bilinear(const uint32_t *texture, dsfixed_t xfrac, d __m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab); \ __m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb); \ \ - __m128i p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(col0[i] + y)), _mm_setzero_si128()); \ - __m128i p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(col1[i] + y)), _mm_setzero_si128()); \ + __m128i p0 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, col0[i][y1], col0[i][y0]), _mm_setzero_si128()); \ + __m128i p1 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, col1[i][y1], col1[i][y0]), _mm_setzero_si128()); \ \ __m128i tmp = _mm_adds_epu16(_mm_mullo_epi16(p0, ab_invab), _mm_mullo_epi16(p1, ainvb_invainvb)); \ __m128i color = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_srli_si128(tmp, 8), tmp), m127), 8); \ @@ -758,12 +794,16 @@ FORCEINLINE uint32_t calc_blend_bgalpha(uint32_t fg, uint32_t dest_alpha) { uint32_t alpha = fg >> 24; alpha += alpha >> 7; - return 256 - alpha; // (dest_alpha * (256 - alpha)) >> 8; + uint32_t inv_alpha = 256 - alpha; + return (dest_alpha * alpha + 256 * inv_alpha + 128) >> 8; } #define VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha) \ __m128i msrc_alpha = _mm_set1_epi16(src_alpha); \ - __m128i mdest_alpha = _mm_set1_epi16(dest_alpha); + __m128i mdest_alpha = _mm_set1_epi16(dest_alpha * 255 / 256); \ + __m128i m256 = _mm_set1_epi16(256); \ + __m128i m255 = _mm_set1_epi16(255); \ + __m128i m128 = _mm_set1_epi16(128); // Calculates the final alpha values to be used when combined with the source texture alpha channel #define VEC_CALC_BLEND_ALPHA(fg) \ @@ -772,8 +812,10 @@ FORCEINLINE uint32_t calc_blend_bgalpha(uint32_t fg, uint32_t dest_alpha) __m128i alpha_lo = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpacklo_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \ alpha_hi = _mm_add_epi16(alpha_hi, _mm_srli_epi16(alpha_hi, 7)); \ alpha_lo = _mm_add_epi16(alpha_lo, _mm_srli_epi16(alpha_lo, 7)); \ - bg_alpha_hi = _mm_sub_epi16(_mm_set1_epi16(256), alpha_hi); /* _mm_srli_epi16(_mm_mullo_epi16(_mm_sub_epi16(_mm_set1_epi16(256), alpha_hi), mdest_alpha), 8);*/ \ - bg_alpha_lo = _mm_sub_epi16(_mm_set1_epi16(256), alpha_lo); /* _mm_srli_epi16(_mm_mullo_epi16(_mm_sub_epi16(_mm_set1_epi16(256), alpha_lo), mdest_alpha), 8);*/ \ + bg_alpha_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_mullo_epi16(mdest_alpha, alpha_hi), _mm_mullo_epi16(m255, _mm_sub_epi16(m256, alpha_hi))), m128), 8); \ + bg_alpha_hi = _mm_add_epi16(bg_alpha_hi, _mm_srli_epi16(bg_alpha_hi, 7)); \ + bg_alpha_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_mullo_epi16(mdest_alpha, alpha_lo), _mm_mullo_epi16(m255, _mm_sub_epi16(m256, alpha_lo))), m128), 8); \ + bg_alpha_lo = _mm_add_epi16(bg_alpha_lo, _mm_srli_epi16(bg_alpha_lo, 7)); \ fg_alpha_hi = msrc_alpha; \ fg_alpha_lo = msrc_alpha; \ } diff --git a/src/r_draw_rgba_sse.h b/src/r_draw_rgba_sse.h index af761c6e7..408a2f5a2 100644 --- a/src/r_draw_rgba_sse.h +++ b/src/r_draw_rgba_sse.h @@ -43,7 +43,7 @@ public: _destorg = dc_destorg; _light = ds_light; _shade_constants = ds_shade_constants; - _magnifying = !span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); + _magnifying = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); } void Execute(DrawerThread *thread) override @@ -70,7 +70,7 @@ public: xstep = _xstep; ystep = _ystep; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; if (_magnifying) @@ -166,7 +166,7 @@ public: spot = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); + *dest++ = LightBgra::shade_bgra(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -258,7 +258,7 @@ public: spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); // Lookup pixel from flat texture tile - *dest++ = shade_bgra(source[spot], light, shade_constants); + *dest++ = LightBgra::shade_bgra(source[spot], light, shade_constants); // Next step in u,v. xfrac += xstep; @@ -305,7 +305,7 @@ public: do { - *dest++ = shade_bgra(sample_bilinear(source, xfrac, yfrac, 26, 26), light, shade_constants); + *dest++ = LightBgra::shade_bgra(SampleBgra::sample_bilinear(source, xfrac, yfrac, 26, 26), light, shade_constants); xfrac += xstep; yfrac += ystep; } while (--count); @@ -349,7 +349,7 @@ public: do { - *dest++ = shade_bgra(sample_bilinear(source, xfrac, yfrac, 32 - _xbits, 32 - _ybits), light, shade_constants); + *dest++ = LightBgra::shade_bgra(SampleBgra::sample_bilinear(source, xfrac, yfrac, 32 - _xbits, 32 - _ybits), light, shade_constants); xfrac += xstep; yfrac += ystep; } while (--count); @@ -364,7 +364,8 @@ class VecCommand(Vlinec4RGBA) : public DrawerCommand int _count; int _pitch; ShadeConstants _shade_constants; - int vlinebits; + int _vlinebits; + uint32_t _vlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; @@ -379,7 +380,8 @@ public: _count = dc_count; _pitch = dc_pitch; _shade_constants = dc_shade_constants; - vlinebits = ::vlinebits; + _vlinebits = vlinebits; + _vlinemax = vlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; @@ -398,13 +400,13 @@ public: return; uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int bits = vlinebits; + int bits = _vlinebits; int pitch = _pitch * thread->num_cores; - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); + uint32_t light0 = LightBgra::calc_light_multiplier(palookuplight[0]); + uint32_t light1 = LightBgra::calc_light_multiplier(palookuplight[1]); + uint32_t light2 = LightBgra::calc_light_multiplier(palookuplight[2]); + uint32_t light3 = LightBgra::calc_light_multiplier(palookuplight[3]); ShadeConstants shade_constants = _shade_constants; @@ -480,7 +482,7 @@ public: do { __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits); + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits, _vlinemax); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -498,7 +500,7 @@ public: do { __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits); + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits, _vlinemax); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -520,7 +522,8 @@ class VecCommand(Mvlinec4RGBA) : public DrawerCommand int _count; int _pitch; ShadeConstants _shade_constants; - int mvlinebits; + int _mvlinebits; + uint32_t _mvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; @@ -535,7 +538,8 @@ public: _count = dc_count; _pitch = dc_pitch; _shade_constants = dc_shade_constants; - mvlinebits = ::mvlinebits; + _mvlinebits = mvlinebits; + _mvlinemax = mvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; @@ -555,12 +559,12 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = mvlinebits; + int bits = _mvlinebits; - uint32_t light0 = calc_light_multiplier(palookuplight[0]); - uint32_t light1 = calc_light_multiplier(palookuplight[1]); - uint32_t light2 = calc_light_multiplier(palookuplight[2]); - uint32_t light3 = calc_light_multiplier(palookuplight[3]); + uint32_t light0 = LightBgra::calc_light_multiplier(palookuplight[0]); + uint32_t light1 = LightBgra::calc_light_multiplier(palookuplight[1]); + uint32_t light2 = LightBgra::calc_light_multiplier(palookuplight[2]); + uint32_t light3 = LightBgra::calc_light_multiplier(palookuplight[3]); ShadeConstants shade_constants = _shade_constants; @@ -640,7 +644,7 @@ public: do { __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits); + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits, _mvlinemax); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -660,7 +664,7 @@ public: do { __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits); + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits, _mvlinemax); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -686,7 +690,8 @@ class VecCommand(Tmvline4AddRGBA) : public DrawerCommand ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - int tmvlinebits; + int _tmvlinebits; + uint32_t _tmvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; @@ -701,7 +706,8 @@ public: _shade_constants = dc_shade_constants; _srcalpha = dc_srcalpha; _destalpha = dc_destalpha; - tmvlinebits = ::tmvlinebits; + _tmvlinebits = tmvlinebits; + _tmvlinemax = tmvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; @@ -719,13 +725,13 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = tmvlinebits; + int bits = _tmvlinebits; uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); + light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); + light[1] = LightBgra::calc_light_multiplier(palookuplight[1]); + light[2] = LightBgra::calc_light_multiplier(palookuplight[2]); + light[3] = LightBgra::calc_light_multiplier(palookuplight[3]); ShadeConstants shade_constants = _shade_constants; @@ -825,7 +831,8 @@ class VecCommand(Tmvline4AddClampRGBA) : public DrawerCommand ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - int tmvlinebits; + int _tmvlinebits; + uint32_t _tmvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; @@ -840,7 +847,8 @@ public: _shade_constants = dc_shade_constants; _srcalpha = dc_srcalpha; _destalpha = dc_destalpha; - tmvlinebits = ::tmvlinebits; + _tmvlinebits = tmvlinebits; + _tmvlinemax = tmvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; @@ -858,13 +866,13 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = tmvlinebits; + int bits = _tmvlinebits; uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); + light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); + light[1] = LightBgra::calc_light_multiplier(palookuplight[1]); + light[2] = LightBgra::calc_light_multiplier(palookuplight[2]); + light[3] = LightBgra::calc_light_multiplier(palookuplight[3]); ShadeConstants shade_constants = _shade_constants; @@ -963,7 +971,8 @@ class VecCommand(Tmvline4SubClampRGBA) : public DrawerCommand ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - int tmvlinebits; + int _tmvlinebits; + uint32_t _tmvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; @@ -978,7 +987,8 @@ public: _shade_constants = dc_shade_constants; _srcalpha = dc_srcalpha; _destalpha = dc_destalpha; - tmvlinebits = ::tmvlinebits; + _tmvlinebits = tmvlinebits; + _tmvlinemax = tmvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; @@ -996,13 +1006,13 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = tmvlinebits; + int bits = _tmvlinebits; uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); + light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); + light[1] = LightBgra::calc_light_multiplier(palookuplight[1]); + light[2] = LightBgra::calc_light_multiplier(palookuplight[2]); + light[3] = LightBgra::calc_light_multiplier(palookuplight[3]); ShadeConstants shade_constants = _shade_constants; @@ -1101,7 +1111,8 @@ class VecCommand(Tmvline4RevSubClampRGBA) : public DrawerCommand ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - int tmvlinebits; + int _tmvlinebits; + uint32_t _tmvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; @@ -1116,7 +1127,8 @@ public: _shade_constants = dc_shade_constants; _srcalpha = dc_srcalpha; _destalpha = dc_destalpha; - tmvlinebits = ::tmvlinebits; + _tmvlinebits = tmvlinebits; + _tmvlinemax = tmvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; @@ -1134,13 +1146,13 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = tmvlinebits; + int bits = _tmvlinebits; uint32_t light[4]; - light[0] = calc_light_multiplier(palookuplight[0]); - light[1] = calc_light_multiplier(palookuplight[1]); - light[2] = calc_light_multiplier(palookuplight[2]); - light[3] = calc_light_multiplier(palookuplight[3]); + light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); + light[1] = LightBgra::calc_light_multiplier(palookuplight[1]); + light[2] = LightBgra::calc_light_multiplier(palookuplight[2]); + light[3] = LightBgra::calc_light_multiplier(palookuplight[3]); ShadeConstants shade_constants = _shade_constants; diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index e239674e8..c39fdc287 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -185,7 +185,7 @@ public: if (count <= 0) return; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); @@ -196,7 +196,7 @@ public: BYTE *colormap = _colormap; if (count & 1) { - *dest = shade_pal_index(colormap[*source], light, shade_constants); + *dest = LightBgra::shade_pal_index(colormap[*source], light, shade_constants); source += sincr; dest += pitch; } @@ -204,8 +204,8 @@ public: return; do { - dest[0] = shade_pal_index(colormap[source[0]], light, shade_constants); - dest[pitch] = shade_pal_index(colormap[source[sincr]], light, shade_constants); + dest[0] = LightBgra::shade_pal_index(colormap[source[0]], light, shade_constants); + dest[pitch] = LightBgra::shade_pal_index(colormap[source[sincr]], light, shade_constants); source += sincr * 2; dest += pitch * 2; } while (--count); @@ -249,7 +249,7 @@ public: if (count <= 0) return; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); @@ -260,10 +260,10 @@ public: BYTE *colormap = _colormap; if (count & 1) { - dest[0] = shade_pal_index(colormap[source[0]], light, shade_constants); - dest[1] = shade_pal_index(colormap[source[1]], light, shade_constants); - dest[2] = shade_pal_index(colormap[source[2]], light, shade_constants); - dest[3] = shade_pal_index(colormap[source[3]], light, shade_constants); + dest[0] = LightBgra::shade_pal_index(colormap[source[0]], light, shade_constants); + dest[1] = LightBgra::shade_pal_index(colormap[source[1]], light, shade_constants); + dest[2] = LightBgra::shade_pal_index(colormap[source[2]], light, shade_constants); + dest[3] = LightBgra::shade_pal_index(colormap[source[3]], light, shade_constants); source += sincr; dest += pitch; } @@ -271,14 +271,14 @@ public: return; do { - dest[0] = shade_pal_index(colormap[source[0]], light, shade_constants); - dest[1] = shade_pal_index(colormap[source[1]], light, shade_constants); - dest[2] = shade_pal_index(colormap[source[2]], light, shade_constants); - dest[3] = shade_pal_index(colormap[source[3]], light, shade_constants); - dest[pitch] = shade_pal_index(colormap[source[sincr]], light, shade_constants); - dest[pitch + 1] = shade_pal_index(colormap[source[sincr + 1]], light, shade_constants); - dest[pitch + 2] = shade_pal_index(colormap[source[sincr + 2]], light, shade_constants); - dest[pitch + 3] = shade_pal_index(colormap[source[sincr + 3]], light, shade_constants); + dest[0] = LightBgra::shade_pal_index(colormap[source[0]], light, shade_constants); + dest[1] = LightBgra::shade_pal_index(colormap[source[1]], light, shade_constants); + dest[2] = LightBgra::shade_pal_index(colormap[source[2]], light, shade_constants); + dest[3] = LightBgra::shade_pal_index(colormap[source[3]], light, shade_constants); + dest[pitch] = LightBgra::shade_pal_index(colormap[source[sincr]], light, shade_constants); + dest[pitch + 1] = LightBgra::shade_pal_index(colormap[source[sincr + 1]], light, shade_constants); + dest[pitch + 2] = LightBgra::shade_pal_index(colormap[source[sincr + 2]], light, shade_constants); + dest[pitch + 3] = LightBgra::shade_pal_index(colormap[source[sincr + 3]], light, shade_constants); source += sincr * 2; dest += pitch * 2; } while (--count); @@ -453,7 +453,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; BYTE *colormap = _colormap; @@ -461,7 +461,7 @@ public: uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(colormap[*source], light, shade_constants); + uint32_t fg = LightBgra::shade_pal_index(colormap[*source], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -528,7 +528,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; BYTE *colormap = _colormap; @@ -538,7 +538,7 @@ public: do { for (int i = 0; i < 4; i++) { - uint32_t fg = shade_pal_index(colormap[source[i]], light, shade_constants); + uint32_t fg = LightBgra::shade_pal_index(colormap[source[i]], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -606,7 +606,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); + uint32_t fg = LightBgra::shade_pal_index_simple(_color, LightBgra::calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -674,7 +674,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t fg = shade_pal_index_simple(_color, calc_light_multiplier(_light)); + uint32_t fg = LightBgra::shade_pal_index_simple(_color, LightBgra::calc_light_multiplier(_light)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -747,14 +747,14 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(*source, light, shade_constants); + uint32_t fg = LightBgra::shade_pal_index(*source, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -818,7 +818,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); @@ -827,7 +827,7 @@ public: do { for (int i = 0; i < 4; i++) { - uint32_t fg = shade_pal_index(source[i], light, shade_constants); + uint32_t fg = LightBgra::shade_pal_index(source[i], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -894,14 +894,14 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(*source, light, shade_constants); + uint32_t fg = LightBgra::shade_pal_index(*source, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -965,7 +965,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); @@ -974,7 +974,7 @@ public: do { for (int i = 0; i < 4; i++) { - uint32_t fg = shade_pal_index(source[i], light, shade_constants); + uint32_t fg = LightBgra::shade_pal_index(source[i], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1042,14 +1042,14 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); do { - uint32_t fg = shade_pal_index(*source, light, shade_constants); + uint32_t fg = LightBgra::shade_pal_index(*source, light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; @@ -1113,7 +1113,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); @@ -1122,7 +1122,7 @@ public: do { for (int i = 0; i < 4; i++) { - uint32_t fg = shade_pal_index(source[i], light, shade_constants); + uint32_t fg = LightBgra::shade_pal_index(source[i], light, shade_constants); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; diff --git a/src/r_drawt_rgba_sse.h b/src/r_drawt_rgba_sse.h index 684be2b6a..64a77e288 100644 --- a/src/r_drawt_rgba_sse.h +++ b/src/r_drawt_rgba_sse.h @@ -48,7 +48,7 @@ public: return; ShadeConstants shade_constants = _shade_constants; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); @@ -207,7 +207,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; BYTE *colormap = _colormap; @@ -335,7 +335,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(shade_pal_index_simple(_color, calc_light_multiplier(_light))), _mm_setzero_si128()); + __m128i fg = _mm_unpackhi_epi8(_mm_set1_epi32(LightBgra::shade_pal_index_simple(_color, LightBgra::calc_light_multiplier(_light))), _mm_setzero_si128()); __m128i alpha_one = _mm_set1_epi16(64); do { @@ -411,7 +411,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); @@ -538,7 +538,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; ShadeConstants shade_constants = _shade_constants; @@ -664,7 +664,7 @@ public: pitch = _pitch * thread->num_cores; sincr = 4 * thread->num_cores; - uint32_t light = calc_light_multiplier(_light); + uint32_t light = LightBgra::calc_light_multiplier(_light); uint32_t *palette = (uint32_t*)GPalette.BaseColors; ShadeConstants shade_constants = _shade_constants; diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 6913db918..0ede451e0 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -514,8 +514,8 @@ void R_MapColoredPlane_rgba(int y, int x1) { uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; int count = (spanend[y] - x1 + 1); - uint32_t light = calc_light_multiplier(ds_light); - uint32_t color = shade_pal_index_simple(ds_color, light); + uint32_t light = LightBgra::calc_light_multiplier(ds_light); + uint32_t color = LightBgra::shade_pal_index_simple(ds_color, light); for (int i = 0; i < count; i++) dest[i] = color; } diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 84c967d1d..95dd287aa 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1276,7 +1276,7 @@ typedef void(*Draw4ColumnsFuncPtr)(); void wallscan_any( int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x), - void(setupwallscan(int bits,Draw1ColumnFuncPtr &draw1, Draw4ColumnsFuncPtr &draw2))) + void(setupwallscan(int bits, int fracmax, Draw1ColumnFuncPtr &draw1, Draw4ColumnsFuncPtr &draw2))) { if (rw_pic->UseType == FTexture::TEX_Null) return; @@ -1286,7 +1286,7 @@ void wallscan_any( DWORD(*draw1column)(); void(*draw4columns)(); - setupwallscan(32 - rw_pic->HeightBits, draw1column, draw4columns); + setupwallscan(32 - rw_pic->HeightBits, (rw_pic->GetHeight() - 1) << (32 - rw_pic->HeightBits), draw1column, draw4columns); bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); if (fixed) @@ -1439,9 +1439,9 @@ void wallscan_any( void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, int fracmax, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) { - setupvline(bits); + setupvline(bits, fracmax); line1 = dovline1; line4 = dovline4; }); @@ -1455,9 +1455,9 @@ void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t } else { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, int fracmax, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) { - setupmvline(bits); + setupmvline(bits, fracmax); line1 = domvline1; line4 = domvline4; }); @@ -1475,9 +1475,9 @@ void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fi } else { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, int fracmax, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) { - setuptmvline(bits); + setuptmvline(bits, fracmax); line1 = reinterpret_cast(tmvline1); line4 = tmvline4; }); diff --git a/src/r_things.cpp b/src/r_things.cpp index e1f1017f3..74707ff72 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -2732,7 +2732,7 @@ void R_DrawParticle_rgba(vissprite_t *vis) DrawerCommandQueue::WaitForWorkers(); - uint32_t fg = shade_pal_index_simple(color, calc_light_multiplier(LIGHTSCALE(0, vis->Style.ColormapNum << FRACBITS))); + uint32_t fg = LightBgra::shade_pal_index_simple(color, LightBgra::calc_light_multiplier(LIGHTSCALE(0, vis->Style.ColormapNum << FRACBITS))); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; diff --git a/src/v_draw.cpp b/src/v_draw.cpp index fd12a1587..0fb433343 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -1026,7 +1026,7 @@ void DCanvas::PUTTRANSDOT (int xx, int yy, int basecolor, int level) { uint32_t *spot = (uint32_t*)GetBuffer() + oldyyshifted + xx; - uint32_t fg = shade_pal_index_simple(basecolor, calc_light_multiplier(0)); + uint32_t fg = LightBgra::shade_pal_index_simple(basecolor, LightBgra::calc_light_multiplier(0)); uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_blue = fg & 0xff; From b7f32d1bfce120c6372d3ca453aaaba755f8207f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 24 Jun 2016 18:05:32 +0200 Subject: [PATCH 075/100] Added LoopIterator to the drawt family of drawers --- src/r_drawt_rgba.cpp | 1193 ++++++++++++------------------------------ 1 file changed, 330 insertions(+), 863 deletions(-) diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index c39fdc287..82932b1f2 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -84,8 +84,9 @@ extern unsigned int *horizspan[4]; ///////////////////////////////////////////////////////////////////////////// -class RtCopy1colRGBACommand : public DrawerCommand +class DrawerRt1colCommand : public DrawerCommand { +public: int hx; int sx; int yl; @@ -93,8 +94,14 @@ class RtCopy1colRGBACommand : public DrawerCommand BYTE * RESTRICT _destorg; int _pitch; -public: - RtCopy1colRGBACommand(int hx, int sx, int yl, int yh) + uint32_t _light; + ShadeConstants _shade_constants; + BYTE * RESTRICT _colormap; + + uint32_t _srcalpha; + uint32_t _destalpha; + + DrawerRt1colCommand(int hx, int sx, int yl, int yh) { this->hx = hx; this->sx = sx; @@ -103,185 +110,384 @@ public: _destorg = dc_destorg; _pitch = dc_pitch; + + _light = LightBgra::calc_light_multiplier(dc_light); + _shade_constants = dc_shade_constants; + _colormap = dc_colormap; + + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); } - void Execute(DrawerThread *thread) override + class LoopIterator { + public: uint32_t *source; uint32_t *dest; int count; int pitch, sincr; - count = thread->count_for_thread(yl, (yh - yl + 1)); - if (count <= 0) - return; + LoopIterator(DrawerRt1colCommand *command, DrawerThread *thread) + { + count = thread->count_for_thread(command->yl, (command->yh - command->yl + 1)); + if (count <= 0) + return; - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = thread->num_cores * 4; + dest = thread->dest_for_thread(command->yl, command->_pitch, ylookup[command->yl] + command->sx + (uint32_t*)command->_destorg); + source = &thread->dc_temp_rgba[command->yl * 4 + command->hx] + thread->skipped_by_thread(command->yl) * 4; + pitch = command->_pitch * thread->num_cores; + sincr = thread->num_cores * 4; + } - if (count & 1) { - *dest = GPalette.BaseColors[*source]; - source += sincr; + explicit operator bool() + { + return count > 0; + } + + bool next() + { dest += pitch; + source += sincr; + return (--count) != 0; } - if (count & 2) { - dest[0] = GPalette.BaseColors[source[0]]; - dest[pitch] = GPalette.BaseColors[source[sincr]]; - source += sincr * 2; - dest += pitch * 2; - } - if (!(count >>= 2)) - return; - - do { - dest[0] = GPalette.BaseColors[source[0]]; - dest[pitch] = GPalette.BaseColors[source[sincr]]; - dest[pitch * 2] = GPalette.BaseColors[source[sincr * 2]]; - dest[pitch * 3] = GPalette.BaseColors[source[sincr * 3]]; - source += sincr * 4; - dest += pitch * 4; - } while (--count); - } + }; }; -class RtMap1colRGBACommand : public DrawerCommand +class DrawerRt4colsCommand : public DrawerCommand { - int hx; +public: int sx; int yl; int yh; - fixed_t _light; + uint32_t _light; ShadeConstants _shade_constants; BYTE * RESTRICT _destorg; int _pitch; BYTE * RESTRICT _colormap; + uint32_t _srcalpha; + uint32_t _destalpha; -public: - RtMap1colRGBACommand(int hx, int sx, int yl, int yh) + DrawerRt4colsCommand(int sx, int yl, int yh) { - this->hx = hx; this->sx = sx; this->yl = yl; this->yh = yh; - _light = dc_light; + _light = LightBgra::calc_light_multiplier(dc_light); _shade_constants = dc_shade_constants; _destorg = dc_destorg; _pitch = dc_pitch; _colormap = dc_colormap; + + _srcalpha = dc_srcalpha >> (FRACBITS - 8); + _destalpha = dc_destalpha >> (FRACBITS - 8); } - void Execute(DrawerThread *thread) override + class LoopIterator { + public: uint32_t *source; uint32_t *dest; int count; int pitch; int sincr; - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; + LoopIterator(DrawerRt4colsCommand *command, DrawerThread *thread) + { + count = thread->count_for_thread(command->yl, command->yh - command->yl + 1); + if (count <= 0) + return; - uint32_t light = LightBgra::calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = thread->num_cores * 4; - - BYTE *colormap = _colormap; - - if (count & 1) { - *dest = LightBgra::shade_pal_index(colormap[*source], light, shade_constants); - source += sincr; - dest += pitch; + dest = thread->dest_for_thread(command->yl, command->_pitch, ylookup[command->yl] + command->sx + (uint32_t*)command->_destorg); + source = &thread->dc_temp_rgba[command->yl * 4] + thread->skipped_by_thread(command->yl) * 4; + pitch = command->_pitch * thread->num_cores; + sincr = thread->num_cores * 4; } - if (!(count >>= 1)) - return; - do { - dest[0] = LightBgra::shade_pal_index(colormap[source[0]], light, shade_constants); - dest[pitch] = LightBgra::shade_pal_index(colormap[source[sincr]], light, shade_constants); - source += sincr * 2; - dest += pitch * 2; - } while (--count); - } + explicit operator bool() + { + return count > 0; + } + + bool next() + { + dest += pitch; + source += sincr; + return (--count) != 0; + } + }; }; -class RtMap4colsRGBACommand : public DrawerCommand +class RtCopy1colRGBACommand : public DrawerRt1colCommand { - int sx; - int yl; - int yh; - fixed_t _light; - ShadeConstants _shade_constants; - BYTE * RESTRICT _destorg; - int _pitch; - BYTE * RESTRICT _colormap; - public: - RtMap4colsRGBACommand(int sx, int yl, int yh) + RtCopy1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh) { - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _light = dc_light; - _shade_constants = dc_shade_constants; - _destorg = dc_destorg; - _pitch = dc_pitch; - _colormap = dc_colormap; } void Execute(DrawerThread *thread) override { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = GPalette.BaseColors[*loop.source]; + *loop.dest = BlendBgra::copy(fg); + } while (loop.next()); + } +}; - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; +class RtMap1colRGBACommand : public DrawerRt1colCommand +{ +public: + RtMap1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh) + { + } - uint32_t light = LightBgra::calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(_colormap[*loop.source], _light, _shade_constants); + *loop.dest = BlendBgra::copy(fg); + } while (loop.next()); + } +}; - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = thread->num_cores * 4; - - BYTE *colormap = _colormap; +class RtMap4colsRGBACommand : public DrawerRt4colsCommand +{ +public: + RtMap4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh) + { + } - if (count & 1) { - dest[0] = LightBgra::shade_pal_index(colormap[source[0]], light, shade_constants); - dest[1] = LightBgra::shade_pal_index(colormap[source[1]], light, shade_constants); - dest[2] = LightBgra::shade_pal_index(colormap[source[2]], light, shade_constants); - dest[3] = LightBgra::shade_pal_index(colormap[source[3]], light, shade_constants); - source += sincr; - dest += pitch; - } - if (!(count >>= 1)) - return; + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_pal_index(_colormap[loop.source[i]], _light, _shade_constants); + loop.dest[i] = BlendBgra::copy(fg); + } + } while (loop.next()); + } +}; - do { - dest[0] = LightBgra::shade_pal_index(colormap[source[0]], light, shade_constants); - dest[1] = LightBgra::shade_pal_index(colormap[source[1]], light, shade_constants); - dest[2] = LightBgra::shade_pal_index(colormap[source[2]], light, shade_constants); - dest[3] = LightBgra::shade_pal_index(colormap[source[3]], light, shade_constants); - dest[pitch] = LightBgra::shade_pal_index(colormap[source[sincr]], light, shade_constants); - dest[pitch + 1] = LightBgra::shade_pal_index(colormap[source[sincr + 1]], light, shade_constants); - dest[pitch + 2] = LightBgra::shade_pal_index(colormap[source[sincr + 2]], light, shade_constants); - dest[pitch + 3] = LightBgra::shade_pal_index(colormap[source[sincr + 3]], light, shade_constants); - source += sincr * 2; - dest += pitch * 2; - } while (--count); +class RtAdd1colRGBACommand : public DrawerRt1colCommand +{ +public: + RtAdd1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh) + { + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(_colormap[*loop.source], _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; + +class RtAdd4colsRGBACommand : public DrawerRt4colsCommand +{ +public: + RtAdd4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh) + { + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_pal_index(_colormap[loop.source[i]], _light, _shade_constants); + loop.dest[i] = BlendBgra::add(fg, loop.dest[i], _srcalpha, _destalpha); + } + } while (loop.next()); + } +}; + +class RtShaded1colRGBACommand : public DrawerRt1colCommand +{ + uint32_t _color; + +public: + RtShaded1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh) + { + _color = LightBgra::shade_pal_index(dc_color, _light, _shade_constants); + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t alpha = _colormap[*loop.source] * 4; + uint32_t inv_alpha = 256 - alpha; + *loop.dest = BlendBgra::add(_color, *loop.dest, alpha, inv_alpha); + } while (loop.next()); + } +}; + +class RtShaded4colsRGBACommand : public DrawerRt4colsCommand +{ + uint32_t _color; + +public: + RtShaded4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh) + { + _color = LightBgra::shade_pal_index(dc_color, _light, _shade_constants); + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + for (int i = 0; i < 4; i++) + { + uint32_t alpha = _colormap[loop.source[i]] * 4; + uint32_t inv_alpha = 256 - alpha; + loop.dest[i] = BlendBgra::add(_color, loop.dest[i], alpha, inv_alpha); + } + } while (loop.next()); + } +}; + +class RtAddClamp1colRGBACommand : public DrawerRt1colCommand +{ +public: + RtAddClamp1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh) + { + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(*loop.source, _light, _shade_constants); + *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; + +class RtAddClamp4colsRGBACommand : public DrawerRt4colsCommand +{ +public: + RtAddClamp4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh) + { + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_pal_index(loop.source[i], _light, _shade_constants); + loop.dest[i] = BlendBgra::add(fg, loop.dest[i], _srcalpha, _destalpha); + } + } while (loop.next()); + } +}; + +class RtSubClamp1colRGBACommand : public DrawerRt1colCommand +{ +public: + RtSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh) + { + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(*loop.source, _light, _shade_constants); + *loop.dest = BlendBgra::sub(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; + +class RtSubClamp4colsRGBACommand : public DrawerRt4colsCommand +{ +public: + RtSubClamp4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh) + { + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_pal_index(loop.source[i], _light, _shade_constants); + loop.dest[i] = BlendBgra::sub(fg, loop.dest[i], _srcalpha, _destalpha); + } + } while (loop.next()); + } +}; + +class RtRevSubClamp1colRGBACommand : public DrawerRt1colCommand +{ +public: + RtRevSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) : DrawerRt1colCommand(hx, sx, yl, yh) + { + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + uint32_t fg = LightBgra::shade_pal_index(*loop.source, _light, _shade_constants); + *loop.dest = BlendBgra::revsub(fg, *loop.dest, _srcalpha, _destalpha); + } while (loop.next()); + } +}; + +class RtRevSubClamp4colsRGBACommand : public DrawerRt4colsCommand +{ +public: + RtRevSubClamp4colsRGBACommand(int sx, int yl, int yh) : DrawerRt4colsCommand(sx, yl, yh) + { + } + + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + do + { + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_pal_index(loop.source[i], _light, _shade_constants); + loop.dest[i] = BlendBgra::revsub(fg, loop.dest[i], _srcalpha, _destalpha); + } + } while (loop.next()); } }; @@ -405,745 +611,6 @@ public: } }; -class RtAdd1colRGBACommand : public DrawerCommand -{ - int hx; - int sx; - int yl; - int yh; - BYTE * RESTRICT _destorg; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - BYTE * RESTRICT _colormap; - -public: - RtAdd1colRGBACommand(int hx, int sx, int yl, int yh) - { - this->hx = hx; - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _destorg = dc_destorg; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - _colormap = dc_colormap; - } - - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t light = LightBgra::calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - BYTE *colormap = _colormap; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do { - uint32_t fg = LightBgra::shade_pal_index(colormap[*source], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - - source += sincr; - dest += pitch; - } while (--count); - } -}; - -class RtAdd4colsRGBACommand : public DrawerCommand -{ - int sx; - int yl; - int yh; - BYTE * RESTRICT _destorg; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - BYTE * RESTRICT _colormap; - fixed_t _srcalpha; - fixed_t _destalpha; - -public: - RtAdd4colsRGBACommand(int sx, int yl, int yh) - { - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _destorg = dc_destorg; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _colormap = dc_colormap; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - } - - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t light = LightBgra::calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - BYTE *colormap = _colormap; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_pal_index(colormap[source[i]], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - - source += sincr; - dest += pitch; - } while (--count); - } -}; - -class RtShaded1colRGBACommand : public DrawerCommand -{ - int hx; - int sx; - int yl; - int yh; - lighttable_t * RESTRICT _colormap; - BYTE * RESTRICT _destorg; - int _pitch; - int _color; - fixed_t _light; - -public: - RtShaded1colRGBACommand(int hx, int sx, int yl, int yh) - { - this->hx = hx; - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _colormap = dc_colormap; - _destorg = dc_destorg; - _pitch = dc_pitch; - _color = dc_color; - _light = dc_light; - } - - void Execute(DrawerThread *thread) override - { - BYTE *colormap; - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - colormap = _colormap; - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t fg = LightBgra::shade_pal_index_simple(_color, LightBgra::calc_light_multiplier(_light)); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do { - uint32_t alpha = colormap[*source]; - uint32_t inv_alpha = 64 - alpha; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; - uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; - uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += sincr; - dest += pitch; - } while (--count); - } -}; - -class RtShaded4colsRGBACommand : public DrawerCommand -{ - int sx; - int yl; - int yh; - lighttable_t * RESTRICT _colormap; - int _color; - BYTE * RESTRICT _destorg; - int _pitch; - fixed_t _light; - -public: - RtShaded4colsRGBACommand(int sx, int yl, int yh) - { - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _colormap = dc_colormap; - _color = dc_color; - _destorg = dc_destorg; - _pitch = dc_pitch; - _light = dc_light; - } - - void Execute(DrawerThread *thread) override - { - BYTE *colormap; - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - colormap = _colormap; - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t fg = LightBgra::shade_pal_index_simple(_color, LightBgra::calc_light_multiplier(_light)); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - do { - for (int i = 0; i < 4; i++) - { - uint32_t alpha = colormap[source[i]]; - uint32_t inv_alpha = 64 - alpha; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 64; - uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 64; - uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 64; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - source += sincr; - dest += pitch; - } while (--count); - } -}; - -class RtAddClamp1colRGBACommand : public DrawerCommand -{ - int hx; - int sx; - int yl; - int yh; - BYTE * RESTRICT _destorg; - int _pitch; - fixed_t _light; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - -public: - RtAddClamp1colRGBACommand(int hx, int sx, int yl, int yh) - { - this->hx = hx; - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _destorg = dc_destorg; - _pitch = dc_pitch; - _light = dc_light; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - } - - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t light = LightBgra::calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do { - uint32_t fg = LightBgra::shade_pal_index(*source, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += sincr; - dest += pitch; - } while (--count); - } -}; - -class RtAddClamp4colsRGBACommand : public DrawerCommand -{ - int sx; - int yl; - int yh; - BYTE * RESTRICT _destorg; - int _pitch; - fixed_t _light; - fixed_t _srcalpha; - fixed_t _destalpha; - ShadeConstants _shade_constants; - -public: - RtAddClamp4colsRGBACommand(int sx, int yl, int yh) - { - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _destorg = dc_destorg; - _pitch = dc_pitch; - _light = dc_light; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - _shade_constants = dc_shade_constants; - } - - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t light = LightBgra::calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_pal_index(source[i], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((fg_red * fg_alpha + bg_red * bg_alpha) / 256, 0, 255); - uint32_t green = clamp((fg_green * fg_alpha + bg_green * bg_alpha) / 256, 0, 255); - uint32_t blue = clamp((fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 0, 255); - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - source += sincr; - dest += pitch; - } while (--count); - } -}; - -class RtSubClamp1colRGBACommand : public DrawerCommand -{ - int hx; - int sx; - int yl; - int yh; - BYTE * RESTRICT _destorg; - int _pitch; - fixed_t _light; - fixed_t _srcalpha; - fixed_t _destalpha; - ShadeConstants _shade_constants; - -public: - RtSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) - { - this->hx = hx; - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _destorg = dc_destorg; - _pitch = dc_pitch; - _light = dc_light; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - _shade_constants = dc_shade_constants; - } - - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t light = LightBgra::calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do { - uint32_t fg = LightBgra::shade_pal_index(*source, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += sincr; - dest += pitch; - } while (--count); - } -}; - -class RtSubClamp4colsRGBACommand : public DrawerCommand -{ - int sx; - int yl; - int yh; - BYTE * RESTRICT _destorg; - int _pitch; - fixed_t _light; - fixed_t _srcalpha; - fixed_t _destalpha; - ShadeConstants _shade_constants; - -public: - RtSubClamp4colsRGBACommand(int sx, int yl, int yh) - { - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _destorg = dc_destorg; - _pitch = dc_pitch; - _light = dc_light; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - _shade_constants = dc_shade_constants; - } - - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t light = LightBgra::calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_pal_index(source[i], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((0x10000 - fg_red * fg_alpha + bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 - fg_green * fg_alpha + bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 - fg_blue * fg_alpha + bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - - source += sincr; - dest += pitch; - } while (--count); - } -}; - -class RtRevSubClamp1colRGBACommand : public DrawerCommand -{ - int hx; - int sx; - int yl; - int yh; - BYTE * RESTRICT _destorg; - int _pitch; - fixed_t _light; - fixed_t _srcalpha; - fixed_t _destalpha; - ShadeConstants _shade_constants; - -public: - RtRevSubClamp1colRGBACommand(int hx, int sx, int yl, int yh) - { - this->hx = hx; - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _destorg = dc_destorg; - _pitch = dc_pitch; - _light = dc_light; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - _shade_constants = dc_shade_constants; - } - - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4 + hx] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t light = LightBgra::calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do { - uint32_t fg = LightBgra::shade_pal_index(*source, light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - source += sincr; - dest += pitch; - } while (--count); - } -}; - -class RtRevSubClamp4colsRGBACommand : public DrawerCommand -{ - int sx; - int yl; - int yh; - BYTE * RESTRICT _destorg; - int _pitch; - fixed_t _light; - fixed_t _srcalpha; - fixed_t _destalpha; - ShadeConstants _shade_constants; - -public: - RtRevSubClamp4colsRGBACommand(int sx, int yl, int yh) - { - this->sx = sx; - this->yl = yl; - this->yh = yh; - - _destorg = dc_destorg; - _pitch = dc_pitch; - _light = dc_light; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - _shade_constants = dc_shade_constants; - } - - void Execute(DrawerThread *thread) override - { - uint32_t *source; - uint32_t *dest; - int count; - int pitch; - int sincr; - - count = thread->count_for_thread(yl, yh - yl + 1); - if (count <= 0) - return; - - dest = thread->dest_for_thread(yl, _pitch, ylookup[yl] + sx + (uint32_t*)_destorg); - source = &thread->dc_temp_rgba[yl * 4] + thread->skipped_by_thread(yl) * 4; - pitch = _pitch * thread->num_cores; - sincr = 4 * thread->num_cores; - - uint32_t light = LightBgra::calc_light_multiplier(_light); - ShadeConstants shade_constants = _shade_constants; - - uint32_t fg_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t bg_alpha = _destalpha >> (FRACBITS - 8); - - do { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_pal_index(source[i], light, shade_constants); - uint32_t fg_red = (fg >> 16) & 0xff; - uint32_t fg_green = (fg >> 8) & 0xff; - uint32_t fg_blue = fg & 0xff; - - uint32_t bg_red = (dest[i] >> 16) & 0xff; - uint32_t bg_green = (dest[i] >> 8) & 0xff; - uint32_t bg_blue = (dest[i]) & 0xff; - - uint32_t red = clamp((0x10000 + fg_red * fg_alpha - bg_red * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t green = clamp((0x10000 + fg_green * fg_alpha - bg_green * bg_alpha) / 256, 256, 256 + 255) - 256; - uint32_t blue = clamp((0x10000 + fg_blue * fg_alpha - bg_blue * bg_alpha) / 256, 256, 256 + 255) - 256; - - dest[i] = 0xff000000 | (red << 16) | (green << 8) | blue; - } - - source += sincr; - dest += pitch; - } while (--count); - } -}; - class RtInitColsRGBACommand : public DrawerCommand { BYTE * RESTRICT buff; From 8ec420a597ee40f52aa0de394d782784a02c6cb9 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 24 Jun 2016 19:05:04 +0200 Subject: [PATCH 076/100] Added support for more texture filtering control --- src/r_draw.h | 6 +++++- src/r_draw_rgba.cpp | 15 ++++++++------- src/r_draw_rgba.h | 9 +++------ src/r_draw_rgba_sse.h | 6 +++--- src/r_segs.cpp | 33 ++++++++++++++++++++++----------- 5 files changed, 41 insertions(+), 28 deletions(-) diff --git a/src/r_draw.h b/src/r_draw.h index bd477efc4..6a078b08f 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -379,6 +379,10 @@ void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade); void R_SetTranslationMap(lighttable_t *translation); extern bool r_swtruecolor; -EXTERN_CVAR(Bool, r_bilinear); + +EXTERN_CVAR(Bool, r_multithreaded); +EXTERN_CVAR(Bool, r_magfilter_linear); +EXTERN_CVAR(Bool, r_minfilter_linear); +EXTERN_CVAR(Bool, r_mipmap); #endif diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index dc97fdd47..5a6e88e3b 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -60,9 +60,10 @@ extern float rw_light; extern float rw_lightstep; extern int wallshade; -CVAR(Bool, r_multithreaded, true, 0) -CVAR(Bool, r_bilinear, true, 0) -CVAR(Bool, r_mipmap, true, 0) +CVAR(Bool, r_multithreaded, true, 0); +CVAR(Bool, r_magfilter_linear, false, 0); +CVAR(Bool, r_minfilter_linear, false, 0); +CVAR(Bool, r_mipmap, true, 0); #ifndef NO_SSE @@ -904,7 +905,7 @@ public: const uint32_t * RESTRICT _source; uint32_t _light; ShadeConstants _shade_constants; - bool _magnifying; + bool _nearest_filter; uint32_t _srcalpha; uint32_t _destalpha; @@ -925,7 +926,7 @@ public: _source = (const uint32_t*)ds_source; _light = LightBgra::calc_light_multiplier(ds_light); _shade_constants = ds_shade_constants; - _magnifying = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); + _nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); _srcalpha = dc_srcalpha >> (FRACBITS - 8); _destalpha = dc_destalpha >> (FRACBITS - 8); @@ -995,7 +996,7 @@ public: LoopIterator loop(this, thread); if (!loop) return; - if (_magnifying) + if (_nearest_filter) { if (loop.is_64x64) { @@ -1040,7 +1041,7 @@ public: LoopIterator loop(this, thread); if (!loop) return; - if (_magnifying) + if (_nearest_filter) { if (loop.is_64x64) { diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 20fff4fc0..56f1faa24 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -461,15 +461,11 @@ class SampleBgra public: inline static bool span_sampler_setup(const uint32_t * RESTRICT &source, int &xbits, int &ybits, fixed_t xstep, fixed_t ystep) { - if (!r_bilinear) - return false; - // Is this a magfilter or minfilter? fixed_t xmagnitude = abs(xstep) >> (32 - xbits - FRACBITS); fixed_t ymagnitude = abs(ystep) >> (32 - ybits - FRACBITS); fixed_t magnitude = (xmagnitude + ymagnitude) * 2 + (1 << (FRACBITS - 1)); - if (magnitude >> FRACBITS == 0) - return false; + bool magnifying = (magnitude >> FRACBITS == 0); if (r_mipmap) { @@ -485,7 +481,8 @@ public: level >>= 1; } } - return true; + + return (magnifying && r_magfilter_linear) || (!magnifying && r_minfilter_linear); } FORCEINLINE static uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, int ybits, uint32_t ymax) diff --git a/src/r_draw_rgba_sse.h b/src/r_draw_rgba_sse.h index 408a2f5a2..bca30185c 100644 --- a/src/r_draw_rgba_sse.h +++ b/src/r_draw_rgba_sse.h @@ -25,7 +25,7 @@ class VecCommand(DrawSpanRGBA) : public DrawerCommand BYTE * RESTRICT _destorg; fixed_t _light; ShadeConstants _shade_constants; - bool _magnifying; + bool _nearest_filter; public: VecCommand(DrawSpanRGBA)() @@ -43,7 +43,7 @@ public: _destorg = dc_destorg; _light = ds_light; _shade_constants = ds_shade_constants; - _magnifying = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); + _nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); } void Execute(DrawerThread *thread) override @@ -73,7 +73,7 @@ public: uint32_t light = LightBgra::calc_light_multiplier(_light); ShadeConstants shade_constants = _shade_constants; - if (_magnifying) + if (_nearest_filter) { if (_xbits == 6 && _ybits == 6) { diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 95dd287aa..5c9037375 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -50,6 +50,7 @@ #include "r_plane.h" #include "r_segs.h" #include "r_3dfloors.h" +#include "r_draw.h" #include "v_palette.h" #include "r_data/colormaps.h" @@ -58,8 +59,6 @@ CVAR(Bool, r_np2, true, 0) -EXTERN_CVAR(Bool, r_bilinear) - //CVAR (Int, ty, 8, 0) //CVAR (Int, tx, 8, 0) @@ -1104,8 +1103,7 @@ WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xof bool magnifying = uv_step >> (uv_fracbits - 1) == 0; - // Only do bilinear filtering if enabled and not a magnifying filter - if (!r_swtruecolor || !r_bilinear || magnifying || getcol != R_GetColumn) + if (!r_swtruecolor || getcol != R_GetColumn) { source = getcol(texture, xoffset >> FRACBITS); source2 = nullptr; @@ -1138,13 +1136,26 @@ WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xof const uint32_t *pixels = texture->GetPixelsBgra() + mipmap_offset; - int tx0 = ((xoffset - FRACUNIT / 2) >> FRACBITS) % mip_width; - if (tx0 < 0) - tx0 += mip_width; - int tx1 = (tx0 + 1) % mip_width; - source = (BYTE*)(pixels + tx0 * mip_height); - source2 = (BYTE*)(pixels + tx1 * mip_height); - texturefracx = ((xoffset + FRACUNIT / 2) >> (FRACBITS - 4)) & 15; + bool filter_nearest = (magnifying && !r_magfilter_linear) || (!magnifying && !r_minfilter_linear); + if (filter_nearest) + { + int tx = (xoffset >> FRACBITS) % mip_width; + if (tx < 0) + tx += mip_width; + source = (BYTE*)(pixels + tx * mip_height); + source2 = nullptr; + texturefracx = 0; + } + else + { + int tx0 = ((xoffset - FRACUNIT / 2) >> FRACBITS) % mip_width; + if (tx0 < 0) + tx0 += mip_width; + int tx1 = (tx0 + 1) % mip_width; + source = (BYTE*)(pixels + tx0 * mip_height); + source2 = (BYTE*)(pixels + tx1 * mip_height); + texturefracx = ((xoffset + FRACUNIT / 2) >> (FRACBITS - 4)) & 15; + } } } From 77054639666f967c7f885e63205ed7978203d3f0 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 25 Jun 2016 10:33:35 +0200 Subject: [PATCH 077/100] Improved linear filtering of walls Fixed some crash bugs Added mipmap and filtering options to the display menu --- src/r_draw.cpp | 16 +- src/r_draw.h | 13 +- src/r_draw_rgba.cpp | 120 ++++----------- src/r_draw_rgba.h | 35 ++--- src/r_draw_rgba_sse.h | 194 +++++++++++++----------- src/r_segs.cpp | 292 +++++++++++++++++++++++-------------- src/textures/textures.h | 4 + wadsrc/static/language.enu | 3 + wadsrc/static/menudef.txt | 3 + 9 files changed, 364 insertions(+), 316 deletions(-) diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 578ca9646..682ed4668 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -154,6 +154,7 @@ int dc_yl; int dc_yh; fixed_t dc_iscale; fixed_t dc_texturefrac; +uint32_t dc_textureheight; int dc_color; // [RH] Color for column filler DWORD dc_srccolor; uint32_t dc_srccolor_bgra; @@ -177,6 +178,7 @@ fixed_t palookuplight[4]; const BYTE* bufplce[4]; const BYTE* bufplce2[4]; uint32_t buftexturefracx[4]; +uint32_t bufheight[4]; // just for profiling int dccount; @@ -1044,6 +1046,7 @@ int ds_ybits; // start of a floor/ceiling tile image const BYTE* ds_source; +bool ds_source_mipmapped; // just for profiling int dscount; @@ -1067,6 +1070,7 @@ extern "C" BYTE *ds_curcolormap, *ds_cursource, *ds_curtiltedsource; void R_SetSpanSource(FTexture *tex) { ds_source = r_swtruecolor ? (const BYTE*)tex->GetPixelsBgra() : tex->GetPixels(); + ds_source_mipmapped = tex->Mipmapped(); #ifdef X86_ASM if (!r_swtruecolor && ds_cursource != ds_source) { @@ -1644,8 +1648,6 @@ extern "C" void R_DrawSlabC(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *v int vlinebits; int mvlinebits; -uint32_t vlinemax; -uint32_t mvlinemax; #ifndef X86_ASM static DWORD vlinec1 (); @@ -1695,12 +1697,11 @@ DWORD (*domvline1)() = mvlineasm1; void (*domvline4)() = mvlineasm4; #endif -void setupvline (int fracbits, int fracmax) +void setupvline (int fracbits) { if (r_swtruecolor) { vlinebits = fracbits; - vlinemax = fracmax; return; } @@ -1780,7 +1781,7 @@ void vlinec4 () } #endif -void setupmvline (int fracbits, int fracmax) +void setupmvline (int fracbits) { if (!r_swtruecolor) { @@ -1795,7 +1796,6 @@ void setupmvline (int fracbits, int fracmax) else { mvlinebits = fracbits; - mvlinemax = fracmax; } } @@ -1968,12 +1968,10 @@ void R_DrawFogBoundary_C (int x1, int x2, short *uclip, short *dclip) } int tmvlinebits; -uint32_t tmvlinemax; -void setuptmvline (int bits, int fracmax) +void setuptmvline (int bits) { tmvlinebits = bits; - tmvlinemax = fracmax; } fixed_t tmvline1_add_C () diff --git a/src/r_draw.h b/src/r_draw.h index 6a078b08f..591ae0b5f 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -61,6 +61,7 @@ extern "C" int dc_yh; extern "C" fixed_t dc_iscale; extern double dc_texturemid; extern "C" fixed_t dc_texturefrac; +extern "C" uint32_t dc_textureheight; extern "C" int dc_color; // [RH] For flat colors (no texturing) extern "C" DWORD dc_srccolor; extern "C" uint32_t dc_srccolor_bgra; @@ -84,6 +85,7 @@ extern "C" fixed_t palookuplight[4]; extern "C" const BYTE* bufplce[4]; extern "C" const BYTE* bufplce2[4]; extern "C" uint32_t buftexturefracx[4]; +extern "C" uint32_t bufheight[4]; // [RH] Temporary buffer for column drawing extern "C" BYTE *dc_temp; @@ -100,13 +102,13 @@ extern void (*R_DrawColumn)(void); extern DWORD (*dovline1) (); extern DWORD (*doprevline1) (); extern void (*dovline4) (); -extern void setupvline (int,int); +extern void setupvline (int); extern DWORD (*domvline1) (); extern void (*domvline4) (); -extern void setupmvline (int,int); +extern void setupmvline (int); -extern void setuptmvline (int,int); +extern void setuptmvline (int); // The Spectre/Invisibility effect. extern void (*R_DrawFuzzColumn)(void); @@ -316,6 +318,7 @@ extern "C" fixed_t ds_alpha; // start of a 64*64 tile image extern "C" const BYTE* ds_source; +extern "C" bool ds_source_mipmapped; extern "C" int ds_color; // [RH] For flat color (no texturing) @@ -381,8 +384,8 @@ void R_SetTranslationMap(lighttable_t *translation); extern bool r_swtruecolor; EXTERN_CVAR(Bool, r_multithreaded); -EXTERN_CVAR(Bool, r_magfilter_linear); -EXTERN_CVAR(Bool, r_minfilter_linear); +EXTERN_CVAR(Bool, r_magfilter); +EXTERN_CVAR(Bool, r_minfilter); EXTERN_CVAR(Bool, r_mipmap); #endif diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 5a6e88e3b..2576cfeda 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -48,22 +48,22 @@ #endif #include -extern int vlinebits; -extern int mvlinebits; -extern int tmvlinebits; -extern uint32_t vlinemax; -extern uint32_t mvlinemax; -extern uint32_t tmvlinemax; - extern "C" short spanend[MAXHEIGHT]; extern float rw_light; extern float rw_lightstep; extern int wallshade; +// Use multiple threads when drawing CVAR(Bool, r_multithreaded, true, 0); -CVAR(Bool, r_magfilter_linear, false, 0); -CVAR(Bool, r_minfilter_linear, false, 0); -CVAR(Bool, r_mipmap, true, 0); + +// Use linear filtering when scaling up +CVAR(Bool, r_magfilter, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); + +// Use linear filtering when scaling down +CVAR(Bool, r_minfilter, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); + +// Use mipmapped textures +CVAR(Bool, r_mipmap, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); #ifndef NO_SSE @@ -926,7 +926,7 @@ public: _source = (const uint32_t*)ds_source; _light = LightBgra::calc_light_multiplier(ds_light); _shade_constants = ds_shade_constants; - _nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); + _nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep, ds_source_mipmapped); _srcalpha = dc_srcalpha >> (FRACBITS - 8); _destalpha = dc_destalpha >> (FRACBITS - 8); @@ -1354,8 +1354,7 @@ public: DWORD _texturefrac; uint32_t _texturefracx; DWORD _iscale; - int _vlinebits; - uint32_t _vlinemax; + uint32_t _textureheight; const uint32 * RESTRICT _source; const uint32 * RESTRICT _source2; @@ -1365,7 +1364,7 @@ public: uint32_t _srcalpha; uint32_t _destalpha; - DrawerWall1Command(int vlinebits, uint32_t vlinemax) + DrawerWall1Command() { _dest = dc_dest; _pitch = dc_pitch; @@ -1373,8 +1372,7 @@ public: _texturefrac = dc_texturefrac; _texturefracx = dc_texturefracx; _iscale = dc_iscale; - _vlinebits = vlinebits; - _vlinemax = vlinemax; + _textureheight = dc_textureheight; _source = (const uint32 *)dc_source; _source2 = (const uint32 *)dc_source2; @@ -1394,7 +1392,8 @@ public: uint32_t fracstep; uint32_t frac; uint32_t texturefracx; - int bits; + uint32_t height; + uint32_t half; LoopIterator(DrawerWall1Command *command, DrawerThread *thread) { @@ -1406,8 +1405,10 @@ public: frac = command->_texturefrac + command->_iscale * thread->skipped_by_thread(command->_dest_y); texturefracx = command->_texturefracx; dest = thread->dest_for_thread(command->_dest_y, command->_pitch, (uint32_t*)command->_dest); - bits = command->_vlinebits; pitch = command->_pitch * thread->num_cores; + + height = command->_textureheight; + half = (0x80000000 + height - 1) / height; } explicit operator bool() @@ -1417,7 +1418,7 @@ public: int sample_index() { - return frac >> bits; + return ((frac >> FRACBITS) * height) >> FRACBITS; } bool next() @@ -1435,12 +1436,11 @@ public: BYTE * RESTRICT _dest; int _count; int _pitch; - int _vlinebits; - uint32_t _vlinemax; ShadeConstants _shade_constants; uint32_t _vplce[4]; uint32_t _vince[4]; uint32_t _buftexturefracx[4]; + uint32_t _bufheight[4]; const uint32_t * RESTRICT _bufplce[4]; const uint32_t * RESTRICT _bufplce2[4]; uint32_t _light[4]; @@ -1448,19 +1448,18 @@ public: uint32_t _srcalpha; uint32_t _destalpha; - DrawerWall4Command(int vlinebits, uint32_t vlinemax) + DrawerWall4Command() { _dest = dc_dest; _count = dc_count; _pitch = dc_pitch; - _vlinebits = vlinebits; - _vlinemax = vlinemax; _shade_constants = dc_shade_constants; for (int i = 0; i < 4; i++) { _vplce[i] = vplce[i]; _vince[i] = vince[i]; _buftexturefracx[i] = buftexturefracx[i]; + _bufheight[i] = bufheight[i]; _bufplce[i] = (const uint32_t *)bufplce[i]; _bufplce2[i] = (const uint32_t *)bufplce2[i]; _light[i] = LightBgra::calc_light_multiplier(palookuplight[i]); @@ -1475,9 +1474,10 @@ public: uint32_t *dest; int pitch; int count; - int bits; uint32_t vplce[4]; uint32_t vince[4]; + uint32_t height[4]; + uint32_t half[4]; LoopIterator(DrawerWall4Command *command, DrawerThread *thread) { @@ -1487,13 +1487,14 @@ public: dest = thread->dest_for_thread(command->_dest_y, command->_pitch, (uint32_t*)command->_dest); pitch = command->_pitch * thread->num_cores; - bits = command->_vlinebits; int skipped = thread->skipped_by_thread(command->_dest_y); for (int i = 0; i < 4; i++) { vplce[i] = command->_vplce[i] + command->_vince[i] * skipped; vince[i] = command->_vince[i] * thread->num_cores; + height[i] = command->_bufheight[i]; + half[i] = (0x80000000 + height[i] - 1) / height[i]; } } @@ -1504,7 +1505,7 @@ public: int sample_index(int col) { - return vplce[col] >> bits; + return ((vplce[col] >> FRACBITS) * height[col]) >> FRACBITS; } bool next() @@ -1522,10 +1523,6 @@ public: class Vlinec1RGBACommand : public DrawerWall1Command { public: - Vlinec1RGBACommand() : DrawerWall1Command(vlinebits, vlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1543,7 +1540,7 @@ public: { do { - uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.bits, _vlinemax), _light, _shade_constants); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.half, loop.height), _light, _shade_constants); *loop.dest = BlendBgra::copy(fg); } while (loop.next()); } @@ -1553,10 +1550,6 @@ public: class Vlinec4RGBACommand : public DrawerWall4Command { public: - Vlinec4RGBACommand() : DrawerWall4Command(vlinebits, vlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1579,7 +1572,7 @@ public: { for (int i = 0; i < 4; i++) { - uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_bufplce[i], _bufplce2[i], _buftexturefracx[i], loop.sample_index(i), loop.bits, _vlinemax), _light[i], _shade_constants); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_bufplce[i], _bufplce2[i], _buftexturefracx[i], loop.sample_index(i), loop.half[i], loop.height[i]), _light[i], _shade_constants); loop.dest[i] = BlendBgra::copy(fg); } } while (loop.next()); @@ -1590,10 +1583,6 @@ public: class Mvlinec1RGBACommand : public DrawerWall1Command { public: - Mvlinec1RGBACommand() : DrawerWall1Command(mvlinebits, mvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1611,7 +1600,7 @@ public: { do { - uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.bits, _vlinemax), _light, _shade_constants); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.half, loop.height), _light, _shade_constants); *loop.dest = BlendBgra::alpha_blend(fg, *loop.dest); } while (loop.next()); } @@ -1621,10 +1610,6 @@ public: class Mvlinec4RGBACommand : public DrawerWall4Command { public: - Mvlinec4RGBACommand(): DrawerWall4Command(mvlinebits, mvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1647,7 +1632,7 @@ public: { for (int i = 0; i < 4; i++) { - uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_bufplce[i], _bufplce2[i], _buftexturefracx[i], loop.sample_index(i), loop.bits, _vlinemax), _light[i], _shade_constants); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_bufplce[i], _bufplce2[i], _buftexturefracx[i], loop.sample_index(i), loop.half[i], loop.height[i]), _light[i], _shade_constants); loop.dest[i] = BlendBgra::alpha_blend(fg, loop.dest[i]); } } while (loop.next()); @@ -1658,10 +1643,6 @@ public: class Tmvline1AddRGBACommand : public DrawerWall1Command { public: - Tmvline1AddRGBACommand() : DrawerWall1Command(tmvlinebits, tmvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1677,10 +1658,6 @@ public: class Tmvline4AddRGBACommand : public DrawerWall4Command { public: - Tmvline4AddRGBACommand() : DrawerWall4Command(tmvlinebits, tmvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1699,10 +1676,6 @@ public: class Tmvline1AddClampRGBACommand : public DrawerWall1Command { public: - Tmvline1AddClampRGBACommand() : DrawerWall1Command(tmvlinebits, tmvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1718,10 +1691,6 @@ public: class Tmvline4AddClampRGBACommand : public DrawerWall4Command { public: - Tmvline4AddClampRGBACommand() : DrawerWall4Command(tmvlinebits, tmvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1740,10 +1709,6 @@ public: class Tmvline1SubClampRGBACommand : public DrawerWall1Command { public: - Tmvline1SubClampRGBACommand() : DrawerWall1Command(tmvlinebits, tmvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1759,10 +1724,6 @@ public: class Tmvline4SubClampRGBACommand : public DrawerWall4Command { public: - Tmvline4SubClampRGBACommand() : DrawerWall4Command(tmvlinebits, tmvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1781,10 +1742,6 @@ public: class Tmvline1RevSubClampRGBACommand : public DrawerWall1Command { public: - Tmvline1RevSubClampRGBACommand() : DrawerWall1Command(tmvlinebits, tmvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -1800,10 +1757,6 @@ public: class Tmvline4RevSubClampRGBACommand : public DrawerWall4Command { public: - Tmvline4RevSubClampRGBACommand() : DrawerWall4Command(tmvlinebits, tmvlinemax) - { - } - void Execute(DrawerThread *thread) override { LoopIterator loop(this, thread); @@ -2362,17 +2315,8 @@ void R_DrawSlab_rgba(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BY DrawerCommandQueue::QueueCommand(dx, v, dy, vi, vptr, p, slab_rgba_shade_constants, slab_rgba_colormap, slab_rgba_light); } -//extern FTexture *rw_pic; // For the asserts below - DWORD vlinec1_rgba() { - /*DWORD fracstep = dc_iscale; - DWORD frac = dc_texturefrac; - DWORD height = rw_pic->GetHeight(); - assert((frac >> vlinebits) < height); - frac += (dc_count-1) * fracstep; - assert((frac >> vlinebits) <= height);*/ - DrawerCommandQueue::QueueCommand(); return dc_texturefrac + dc_count * dc_iscale; } diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 56f1faa24..a60fd65c7 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -459,7 +459,7 @@ public: class SampleBgra { public: - inline static bool span_sampler_setup(const uint32_t * RESTRICT &source, int &xbits, int &ybits, fixed_t xstep, fixed_t ystep) + inline static bool span_sampler_setup(const uint32_t * RESTRICT &source, int &xbits, int &ybits, fixed_t xstep, fixed_t ystep, bool mipmapped) { // Is this a magfilter or minfilter? fixed_t xmagnitude = abs(xstep) >> (32 - xbits - FRACBITS); @@ -467,7 +467,7 @@ public: fixed_t magnitude = (xmagnitude + ymagnitude) * 2 + (1 << (FRACBITS - 1)); bool magnifying = (magnitude >> FRACBITS == 0); - if (r_mipmap) + if (r_mipmap && mipmapped) { int level = magnitude >> (FRACBITS + 1); while (level != 0) @@ -482,18 +482,15 @@ public: } } - return (magnifying && r_magfilter_linear) || (!magnifying && r_minfilter_linear); + return (magnifying && r_magfilter) || (!magnifying && r_minfilter); } - FORCEINLINE static uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, int ybits, uint32_t ymax) + FORCEINLINE static uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, uint32_t half, uint32_t height) { - uint32_t half = 1 << (ybits - 1); - uint32_t y0 = (texturefracy - half) >> ybits; - if (y0 > ymax) - y0 = 0; - uint32_t y1 = y0 + 1; - if (y1 > ymax) - y1 = 0; + uint32_t frac_y0 = ((texturefracy - half) >> FRACBITS) * height; + uint32_t frac_y1 = ((texturefracy + half) >> FRACBITS) * height; + uint32_t y0 = frac_y0 >> FRACBITS; + uint32_t y1 = frac_y1 >> FRACBITS; uint32_t p00 = col0[y0]; uint32_t p01 = col0[y1]; @@ -501,7 +498,7 @@ public: uint32_t p11 = col1[y1]; uint32_t inv_b = texturefracx; - uint32_t inv_a = ((texturefracy + half) >> (ybits - 4)) & 15; + uint32_t inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; uint32_t a = 16 - inv_a; uint32_t b = 16 - inv_b; @@ -546,20 +543,18 @@ public: ///////////////////////////////////////////////////////////////////////////// // SSE/AVX shading macros: -#define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, ybits, ymax) { \ - uint32_t half = 1 << (ybits - 1); \ - \ +#define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, half, height) { \ __m128i m127 = _mm_set1_epi16(127); \ fg = _mm_setzero_si128(); \ for (int i = 0; i < 4; i++) \ { \ - uint32_t y0 = (texturefracy[i] - half) >> ybits; \ - if (y0 > ymax) y0 = 0; \ - uint32_t y1 = y0 + 1; \ - if (y1 > ymax) y1 = 0; \ + uint32_t frac_y0 = ((texturefracy[i] - half[i]) >> FRACBITS) * height[i]; \ + uint32_t frac_y1 = ((texturefracy[i] + half[i]) >> FRACBITS) * height[i]; \ + uint32_t y0 = frac_y0 >> FRACBITS; \ + uint32_t y1 = frac_y1 >> FRACBITS; \ \ uint32_t inv_b = texturefracx[i]; \ - uint32_t inv_a = ((texturefracy[i] + half) >> (ybits - 4)) & 15; \ + uint32_t inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; \ uint32_t a = 16 - inv_a; \ uint32_t b = 16 - inv_b; \ \ diff --git a/src/r_draw_rgba_sse.h b/src/r_draw_rgba_sse.h index bca30185c..ae8d3bf42 100644 --- a/src/r_draw_rgba_sse.h +++ b/src/r_draw_rgba_sse.h @@ -43,7 +43,7 @@ public: _destorg = dc_destorg; _light = ds_light; _shade_constants = ds_shade_constants; - _nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep); + _nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep, ds_source_mipmapped); } void Execute(DrawerThread *thread) override @@ -364,14 +364,13 @@ class VecCommand(Vlinec4RGBA) : public DrawerCommand int _count; int _pitch; ShadeConstants _shade_constants; - int _vlinebits; - uint32_t _vlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; const uint32 * RESTRICT bufplce[4]; const uint32_t * RESTRICT bufplce2[4]; uint32_t buftexturefracx[4]; + uint32_t bufheight[4]; public: VecCommand(Vlinec4RGBA)() @@ -380,8 +379,6 @@ public: _count = dc_count; _pitch = dc_pitch; _shade_constants = dc_shade_constants; - _vlinebits = vlinebits; - _vlinemax = vlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; @@ -390,6 +387,7 @@ public: bufplce[i] = (const uint32 *)::bufplce[i]; bufplce2[i] = (const uint32_t *)::bufplce2[i]; buftexturefracx[i] = ::buftexturefracx[i]; + bufheight[i] = ::bufheight[i]; } } @@ -400,9 +398,16 @@ public: return; uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int bits = _vlinebits; int pitch = _pitch * thread->num_cores; + uint32_t height[4]; + uint32_t half[4]; + for (int i = 0; i < 4; i++) + { + height[i] = bufheight[i]; + half[i] = (0x80000000 + height[i] - 1) / height[i]; + } + uint32_t light0 = LightBgra::calc_light_multiplier(palookuplight[0]); uint32_t light1 = LightBgra::calc_light_multiplier(palookuplight[1]); uint32_t light2 = LightBgra::calc_light_multiplier(palookuplight[2]); @@ -431,10 +436,10 @@ public: DWORD place2 = local_vplce[2]; DWORD place3 = local_vplce[3]; - uint32_t p0 = bufplce[0][place0 >> bits]; - uint32_t p1 = bufplce[1][place1 >> bits]; - uint32_t p2 = bufplce[2][place2 >> bits]; - uint32_t p3 = bufplce[3][place3 >> bits]; + uint32_t p0 = bufplce[0][((place0 >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t p1 = bufplce[1][((place1 >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t p2 = bufplce[2][((place2 >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t p3 = bufplce[3][((place3 >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = place0 + local_vince[0]; local_vplce[1] = place1 + local_vince[1]; @@ -457,10 +462,10 @@ public: DWORD place2 = local_vplce[2]; DWORD place3 = local_vplce[3]; - uint32_t p0 = bufplce[0][place0 >> bits]; - uint32_t p1 = bufplce[1][place1 >> bits]; - uint32_t p2 = bufplce[2][place2 >> bits]; - uint32_t p3 = bufplce[3][place3 >> bits]; + uint32_t p0 = bufplce[0][((place0 >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t p1 = bufplce[1][((place1 >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t p2 = bufplce[2][((place2 >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t p3 = bufplce[3][((place3 >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = place0 + local_vince[0]; local_vplce[1] = place1 + local_vince[1]; @@ -482,7 +487,7 @@ public: do { __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits, _vlinemax); + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, half, height); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -500,7 +505,7 @@ public: do { __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits, _vlinemax); + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, half, height); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -522,7 +527,6 @@ class VecCommand(Mvlinec4RGBA) : public DrawerCommand int _count; int _pitch; ShadeConstants _shade_constants; - int _mvlinebits; uint32_t _mvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; @@ -530,6 +534,7 @@ class VecCommand(Mvlinec4RGBA) : public DrawerCommand const uint32 * RESTRICT bufplce[4]; const uint32 * RESTRICT bufplce2[4]; uint32_t buftexturefracx[4]; + uint32_t bufheight[4]; public: VecCommand(Mvlinec4RGBA)() @@ -538,8 +543,6 @@ public: _count = dc_count; _pitch = dc_pitch; _shade_constants = dc_shade_constants; - _mvlinebits = mvlinebits; - _mvlinemax = mvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; @@ -548,6 +551,7 @@ public: bufplce[i] = (const uint32 *)::bufplce[i]; bufplce2[i] = (const uint32_t *)::bufplce2[i]; buftexturefracx[i] = ::buftexturefracx[i]; + bufheight[i] = ::bufheight[i]; } } @@ -559,7 +563,13 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = _mvlinebits; + uint32_t height[4]; + uint32_t half[4]; + for (int i = 0; i < 4; i++) + { + height[i] = bufheight[i]; + half[i] = (0x80000000 + height[i] - 1) / height[i]; + } uint32_t light0 = LightBgra::calc_light_multiplier(palookuplight[0]); uint32_t light1 = LightBgra::calc_light_multiplier(palookuplight[1]); @@ -589,10 +599,10 @@ public: DWORD place2 = local_vplce[2]; DWORD place3 = local_vplce[3]; - uint32_t pix0 = bufplce[0][place0 >> bits]; - uint32_t pix1 = bufplce[1][place1 >> bits]; - uint32_t pix2 = bufplce[2][place2 >> bits]; - uint32_t pix3 = bufplce[3][place3 >> bits]; + uint32_t pix0 = bufplce[0][((place0 >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((place1 >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((place2 >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((place3 >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = place0 + local_vince[0]; local_vplce[1] = place1 + local_vince[1]; @@ -617,10 +627,10 @@ public: DWORD place2 = local_vplce[2]; DWORD place3 = local_vplce[3]; - uint32_t pix0 = bufplce[0][place0 >> bits]; - uint32_t pix1 = bufplce[1][place1 >> bits]; - uint32_t pix2 = bufplce[2][place2 >> bits]; - uint32_t pix3 = bufplce[3][place3 >> bits]; + uint32_t pix0 = bufplce[0][((place0 >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((place1 >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((place2 >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((place3 >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = place0 + local_vince[0]; local_vplce[1] = place1 + local_vince[1]; @@ -644,7 +654,7 @@ public: do { __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits, _mvlinemax); + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, half, height); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -664,7 +674,7 @@ public: do { __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, bits, _mvlinemax); + VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, half, height); local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -690,12 +700,11 @@ class VecCommand(Tmvline4AddRGBA) : public DrawerCommand ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - int _tmvlinebits; - uint32_t _tmvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; const uint32 * RESTRICT bufplce[4]; + uint32_t bufheight[4]; public: VecCommand(Tmvline4AddRGBA)() @@ -706,14 +715,13 @@ public: _shade_constants = dc_shade_constants; _srcalpha = dc_srcalpha; _destalpha = dc_destalpha; - _tmvlinebits = tmvlinebits; - _tmvlinemax = tmvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; bufplce[i] = (const uint32 *)::bufplce[i]; + bufheight[i] = ::bufheight[i]; } } @@ -725,7 +733,14 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = _tmvlinebits; + + uint32_t height[4]; + uint32_t half[4]; + for (int i = 0; i < 4; i++) + { + height[i] = bufheight[i]; + half[i] = (0x80000000 + height[i] - 1) / height[i]; + } uint32_t light[4]; light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); @@ -754,10 +769,10 @@ public: do { - uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; - uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; - uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; - uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -791,10 +806,10 @@ public: do { - uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; - uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; - uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; - uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -831,12 +846,11 @@ class VecCommand(Tmvline4AddClampRGBA) : public DrawerCommand ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - int _tmvlinebits; - uint32_t _tmvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; const uint32 *RESTRICT bufplce[4]; + uint32_t bufheight[4]; public: VecCommand(Tmvline4AddClampRGBA)() @@ -847,14 +861,13 @@ public: _shade_constants = dc_shade_constants; _srcalpha = dc_srcalpha; _destalpha = dc_destalpha; - _tmvlinebits = tmvlinebits; - _tmvlinemax = tmvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; bufplce[i] = (const uint32 *)::bufplce[i]; + bufheight[i] = ::bufheight[i]; } } @@ -866,7 +879,14 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = _tmvlinebits; + + uint32_t height[4]; + uint32_t half[4]; + for (int i = 0; i < 4; i++) + { + height[i] = bufheight[i]; + half[i] = (0x80000000 + height[i] - 1) / height[i]; + } uint32_t light[4]; light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); @@ -895,10 +915,10 @@ public: do { - uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; - uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; - uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; - uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -931,10 +951,10 @@ public: do { - uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; - uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; - uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; - uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -971,12 +991,11 @@ class VecCommand(Tmvline4SubClampRGBA) : public DrawerCommand ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - int _tmvlinebits; - uint32_t _tmvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; const uint32 *RESTRICT bufplce[4]; + uint32_t bufheight[4]; public: VecCommand(Tmvline4SubClampRGBA)() @@ -987,14 +1006,13 @@ public: _shade_constants = dc_shade_constants; _srcalpha = dc_srcalpha; _destalpha = dc_destalpha; - _tmvlinebits = tmvlinebits; - _tmvlinemax = tmvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; bufplce[i] = (const uint32 *)::bufplce[i]; + bufheight[i] = ::bufheight[i]; } } @@ -1006,7 +1024,14 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = _tmvlinebits; + + uint32_t height[4]; + uint32_t half[4]; + for (int i = 0; i < 4; i++) + { + height[i] = bufheight[i]; + half[i] = (0x80000000 + height[i] - 1) / height[i]; + } uint32_t light[4]; light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); @@ -1035,10 +1060,10 @@ public: do { - uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; - uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; - uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; - uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -1071,10 +1096,10 @@ public: do { - uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; - uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; - uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; - uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -1111,12 +1136,11 @@ class VecCommand(Tmvline4RevSubClampRGBA) : public DrawerCommand ShadeConstants _shade_constants; fixed_t _srcalpha; fixed_t _destalpha; - int _tmvlinebits; - uint32_t _tmvlinemax; fixed_t palookuplight[4]; DWORD vplce[4]; DWORD vince[4]; const uint32 *RESTRICT bufplce[4]; + uint32_t bufheight[4]; public: VecCommand(Tmvline4RevSubClampRGBA)() @@ -1127,14 +1151,13 @@ public: _shade_constants = dc_shade_constants; _srcalpha = dc_srcalpha; _destalpha = dc_destalpha; - _tmvlinebits = tmvlinebits; - _tmvlinemax = tmvlinemax; for (int i = 0; i < 4; i++) { palookuplight[i] = ::palookuplight[i]; vplce[i] = ::vplce[i]; vince[i] = ::vince[i]; bufplce[i] = (const uint32 *)::bufplce[i]; + bufheight[i] = ::bufheight[4]; } } @@ -1146,7 +1169,14 @@ public: uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); int pitch = _pitch * thread->num_cores; - int bits = _tmvlinebits; + + uint32_t height[4]; + uint32_t half[4]; + for (int i = 0; i < 4; i++) + { + height[i] = bufheight[i]; + half[i] = (0x80000000 + height[i] - 1) / height[i]; + } uint32_t light[4]; light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); @@ -1175,10 +1205,10 @@ public: do { - uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; - uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; - uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; - uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; @@ -1211,10 +1241,10 @@ public: do { - uint32_t pix0 = bufplce[0][local_vplce[0] >> bits]; - uint32_t pix1 = bufplce[1][local_vplce[1] >> bits]; - uint32_t pix2 = bufplce[2][local_vplce[2] >> bits]; - uint32_t pix3 = bufplce[3][local_vplce[3] >> bits]; + uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; + uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; + uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; + uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; local_vplce[0] = local_vplce[0] + local_vince[0]; local_vplce[1] = local_vplce[1] + local_vince[1]; diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 5c9037375..630d64da0 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1075,86 +1075,112 @@ struct WallscanSampler uint32_t uv_pos; uint32_t uv_step; - int32_t uv_fracbits; uint32_t uv_max; const BYTE *source; const BYTE *source2; uint32_t texturefracx; + uint32_t height; }; WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xoffset, FTexture *texture, const BYTE*(*getcol)(FTexture *texture, int x)) { - int base_width = texture->GetWidth(); - int base_height = texture->GetHeight(); - uv_fracbits = 32 - texture->HeightBits; - uv_max = base_height << uv_fracbits; - - // Find start uv in [0-base_height[ range. - // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. - double uv_stepd = swal * yrepeat; - double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / base_height; - v = v - floor(v); - v *= base_height; - v *= (1 << uv_fracbits); - - uv_pos = (uint32_t)v; - uv_step = xs_ToFixed(uv_fracbits, uv_stepd); - - bool magnifying = uv_step >> (uv_fracbits - 1) == 0; - - if (!r_swtruecolor || getcol != R_GetColumn) + if (!r_swtruecolor) { + height = texture->GetHeight(); + int uv_fracbits = 32 - texture->HeightBits; + uv_max = height << uv_fracbits; + + // Find start uv in [0-base_height[ range. + // Not using xs_ToFixed because it rounds the result and we need something that always rounds down to stay within the range. + double uv_stepd = swal * yrepeat; + double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / height; + v = v - floor(v); + v *= height; + v *= (1 << uv_fracbits); + + uv_pos = (uint32_t)v; + uv_step = xs_ToFixed(uv_fracbits, uv_stepd); + if (uv_step == 0) // To prevent divide by zero elsewhere + uv_step = 1; + source = getcol(texture, xoffset >> FRACBITS); source2 = nullptr; texturefracx = 0; } else { - int mipmap_offset = 0; - int mip_width = base_width; - int mip_height = base_height; - if (r_mipmap) - { - fixed_t magnitude = abs((int32_t)uv_step) >> (uv_fracbits - FRACBITS); - int level = magnitude >> FRACBITS; - while (level != 0) - { - if (uv_fracbits > 30) - break; + // Normalize to 0-1 range: + double uv_stepd = swal * yrepeat; + double v = (dc_texturemid + uv_stepd * (y1 - CenterY + 0.5)) / texture->GetHeight(); + v = v - floor(v); + double v_step = uv_stepd / texture->GetHeight(); - mipmap_offset += mip_width * mip_height; - uv_fracbits += 1; - uv_pos >>= 1; - uv_step >>= 1; - xoffset >>= 1; - level >>= 1; - mip_width = MAX(mip_width >> 1, 1); - mip_height = MAX(mip_height >> 1, 1); - } + if (isnan(v) || isnan(v_step)) // this should never happen, but it apparently does.. + { + uv_stepd = 0.0; + v = 0.0; + v_step = 0.0; } - const uint32_t *pixels = texture->GetPixelsBgra() + mipmap_offset; + // Convert to uint32: + uv_pos = (uint32_t)(v * 0x100000000LL); + uv_step = (uint32_t)(v_step * 0x100000000LL); + uv_max = 0; - bool filter_nearest = (magnifying && !r_magfilter_linear) || (!magnifying && !r_minfilter_linear); - if (filter_nearest) + // Texture mipmap and filter selection: + if (getcol != R_GetColumn) { - int tx = (xoffset >> FRACBITS) % mip_width; - if (tx < 0) - tx += mip_width; - source = (BYTE*)(pixels + tx * mip_height); + source = getcol(texture, xoffset >> FRACBITS); source2 = nullptr; + height = texture->GetHeight(); texturefracx = 0; } else { - int tx0 = ((xoffset - FRACUNIT / 2) >> FRACBITS) % mip_width; - if (tx0 < 0) - tx0 += mip_width; - int tx1 = (tx0 + 1) % mip_width; - source = (BYTE*)(pixels + tx0 * mip_height); - source2 = (BYTE*)(pixels + tx1 * mip_height); - texturefracx = ((xoffset + FRACUNIT / 2) >> (FRACBITS - 4)) & 15; + double magnitude = abs(uv_stepd * 2); + bool magnifying = magnitude < 1.0f; + + int mipmap_offset = 0; + int mip_width = texture->GetWidth(); + int mip_height = texture->GetHeight(); + if (r_mipmap && texture->Mipmapped()) + { + int level = (int)MAX(magnitude - 1.0, 0.0); + while (level != 0) + { + mipmap_offset += mip_width * mip_height; + xoffset >>= 1; + level >>= 1; + mip_width = MAX(mip_width >> 1, 1); + mip_height = MAX(mip_height >> 1, 1); + } + } + + const uint32_t *pixels = texture->GetPixelsBgra() + mipmap_offset; + + bool filter_nearest = (magnifying && !r_magfilter) || (!magnifying && !r_minfilter); + if (filter_nearest) + { + int tx = (xoffset >> FRACBITS) % mip_width; + if (tx < 0) + tx += mip_width; + source = (BYTE*)(pixels + tx * mip_height); + source2 = nullptr; + height = mip_height; + texturefracx = 0; + } + else + { + int tx0 = ((xoffset - FRACUNIT / 2) >> FRACBITS) % mip_width; + if (tx0 < 0) + tx0 += mip_width; + int tx1 = (tx0 + 1) % mip_width; + source = (BYTE*)(pixels + tx0 * mip_height); + source2 = (BYTE*)(pixels + tx1 * mip_height); + height = mip_height; + texturefracx = ((xoffset + FRACUNIT / 2) >> (FRACBITS - 4)) & 15; + } } } } @@ -1162,18 +1188,18 @@ WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xof // Draw a column with support for non-power-of-two ranges void wallscan_drawcol1(int x, int y1, int y2, WallscanSampler &sampler, DWORD(*draw1column)()) { - int pixelsize = r_swtruecolor ? 4 : 1; - if (sampler.uv_max == 0) // power of two + if (r_swtruecolor) { int count = y2 - y1; dc_source = sampler.source; dc_source2 = sampler.source2; dc_texturefracx = sampler.texturefracx; - dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_dest = (ylookup[y1] + x) * 4 + dc_destorg; dc_count = count; dc_iscale = sampler.uv_step; dc_texturefrac = sampler.uv_pos; + dc_textureheight = sampler.height; draw1column(); uint64_t step64 = sampler.uv_step; @@ -1182,41 +1208,60 @@ void wallscan_drawcol1(int x, int y1, int y2, WallscanSampler &sampler, DWORD(*d } else { - uint32_t uv_pos = sampler.uv_pos; - - uint32_t left = y2 - y1; - while (left > 0) + if (sampler.uv_max == 0) // power of two { - uint32_t available = sampler.uv_max - uv_pos; - uint32_t next_uv_wrap = available / sampler.uv_step; - if (available % sampler.uv_step != 0) - next_uv_wrap++; - uint32_t count = MIN(left, next_uv_wrap); + int count = y2 - y1; dc_source = sampler.source; dc_source2 = sampler.source2; dc_texturefracx = sampler.texturefracx; - dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_dest = (ylookup[y1] + x) + dc_destorg; dc_count = count; dc_iscale = sampler.uv_step; - dc_texturefrac = uv_pos; + dc_texturefrac = sampler.uv_pos; draw1column(); - left -= count; - uv_pos += sampler.uv_step * count; - if (uv_pos >= sampler.uv_max) - uv_pos -= sampler.uv_max; + uint64_t step64 = sampler.uv_step; + uint64_t pos64 = sampler.uv_pos; + sampler.uv_pos = (uint32_t)(pos64 + step64 * count); } + else + { + uint32_t uv_pos = sampler.uv_pos; - sampler.uv_pos = uv_pos; + uint32_t left = y2 - y1; + while (left > 0) + { + uint32_t available = sampler.uv_max - uv_pos; + uint32_t next_uv_wrap = available / sampler.uv_step; + if (available % sampler.uv_step != 0) + next_uv_wrap++; + uint32_t count = MIN(left, next_uv_wrap); + + dc_source = sampler.source; + dc_source2 = sampler.source2; + dc_texturefracx = sampler.texturefracx; + dc_dest = (ylookup[y1] + x) + dc_destorg; + dc_count = count; + dc_iscale = sampler.uv_step; + dc_texturefrac = uv_pos; + draw1column(); + + left -= count; + uv_pos += sampler.uv_step * count; + if (uv_pos >= sampler.uv_max) + uv_pos -= sampler.uv_max; + } + + sampler.uv_pos = uv_pos; + } } } // Draw four columns with support for non-power-of-two ranges void wallscan_drawcol4(int x, int y1, int y2, WallscanSampler *sampler, void(*draw4columns)()) { - int pixelsize = r_swtruecolor ? 4 : 1; - if (sampler[0].uv_max == 0) // power of two, no wrap handling needed + if (r_swtruecolor) { int count = y2 - y1; for (int i = 0; i < 4; i++) @@ -1224,6 +1269,7 @@ void wallscan_drawcol4(int x, int y1, int y2, WallscanSampler *sampler, void(*dr bufplce[i] = sampler[i].source; bufplce2[i] = sampler[i].source2; buftexturefracx[i] = sampler[i].texturefracx; + bufheight[i] = sampler[i].height; vplce[i] = sampler[i].uv_pos; vince[i] = sampler[i].uv_step; @@ -1231,52 +1277,74 @@ void wallscan_drawcol4(int x, int y1, int y2, WallscanSampler *sampler, void(*dr uint64_t pos64 = sampler[i].uv_pos; sampler[i].uv_pos = (uint32_t)(pos64 + step64 * count); } - dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; + dc_dest = (ylookup[y1] + x) * 4 + dc_destorg; dc_count = count; draw4columns(); } else { - dc_dest = (ylookup[y1] + x) * pixelsize + dc_destorg; - for (int i = 0; i < 4; i++) + if (sampler[0].uv_max == 0) // power of two, no wrap handling needed { - bufplce[i] = sampler[i].source; - bufplce2[i] = sampler[i].source2; - buftexturefracx[i] = sampler[i].texturefracx; - } - - uint32_t left = y2 - y1; - while (left > 0) - { - // Find which column wraps first - uint32_t count = left; - for (int i = 0; i < 4; i++) - { - uint32_t available = sampler[i].uv_max - sampler[i].uv_pos; - uint32_t next_uv_wrap = available / sampler[i].uv_step; - if (available % sampler[i].uv_step != 0) - next_uv_wrap++; - count = MIN(next_uv_wrap, count); - } - - // Draw until that column wraps + int count = y2 - y1; for (int i = 0; i < 4; i++) { + bufplce[i] = sampler[i].source; + bufplce2[i] = sampler[i].source2; + buftexturefracx[i] = sampler[i].texturefracx; vplce[i] = sampler[i].uv_pos; vince[i] = sampler[i].uv_step; + + uint64_t step64 = sampler[i].uv_step; + uint64_t pos64 = sampler[i].uv_pos; + sampler[i].uv_pos = (uint32_t)(pos64 + step64 * count); } + dc_dest = (ylookup[y1] + x) + dc_destorg; dc_count = count; draw4columns(); - - // Wrap the uv position + } + else + { + dc_dest = (ylookup[y1] + x) + dc_destorg; for (int i = 0; i < 4; i++) { - sampler[i].uv_pos += sampler[i].uv_step * count; - if (sampler[i].uv_pos >= sampler[i].uv_max) - sampler[i].uv_pos -= sampler[i].uv_max; + bufplce[i] = sampler[i].source; + bufplce2[i] = sampler[i].source2; + buftexturefracx[i] = sampler[i].texturefracx; } - left -= count; + uint32_t left = y2 - y1; + while (left > 0) + { + // Find which column wraps first + uint32_t count = left; + for (int i = 0; i < 4; i++) + { + uint32_t available = sampler[i].uv_max - sampler[i].uv_pos; + uint32_t next_uv_wrap = available / sampler[i].uv_step; + if (available % sampler[i].uv_step != 0) + next_uv_wrap++; + count = MIN(next_uv_wrap, count); + } + + // Draw until that column wraps + for (int i = 0; i < 4; i++) + { + vplce[i] = sampler[i].uv_pos; + vince[i] = sampler[i].uv_step; + } + dc_count = count; + draw4columns(); + + // Wrap the uv position + for (int i = 0; i < 4; i++) + { + sampler[i].uv_pos += sampler[i].uv_step * count; + if (sampler[i].uv_pos >= sampler[i].uv_max) + sampler[i].uv_pos -= sampler[i].uv_max; + } + + left -= count; + } } } } @@ -1287,7 +1355,7 @@ typedef void(*Draw4ColumnsFuncPtr)(); void wallscan_any( int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x), - void(setupwallscan(int bits, int fracmax, Draw1ColumnFuncPtr &draw1, Draw4ColumnsFuncPtr &draw2))) + void(setupwallscan(int bits, Draw1ColumnFuncPtr &draw1, Draw4ColumnsFuncPtr &draw2))) { if (rw_pic->UseType == FTexture::TEX_Null) return; @@ -1297,7 +1365,7 @@ void wallscan_any( DWORD(*draw1column)(); void(*draw4columns)(); - setupwallscan(32 - rw_pic->HeightBits, (rw_pic->GetHeight() - 1) << (32 - rw_pic->HeightBits), draw1column, draw4columns); + setupwallscan(r_swtruecolor ? FRACBITS : 32 - rw_pic->HeightBits, draw1column, draw4columns); bool fixed = (fixedcolormap != NULL || fixedlightlev >= 0); if (fixed) @@ -1450,9 +1518,9 @@ void wallscan_any( void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x)) { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, int fracmax, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) { - setupvline(bits, fracmax); + setupvline(bits); line1 = dovline1; line4 = dovline4; }); @@ -1466,9 +1534,9 @@ void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t } else { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, int fracmax, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) { - setupmvline(bits, fracmax); + setupmvline(bits); line1 = domvline1; line4 = domvline4; }); @@ -1486,9 +1554,9 @@ void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fi } else { - wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, int fracmax, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) + wallscan_any(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, [](int bits, Draw1ColumnFuncPtr &line1, Draw4ColumnsFuncPtr &line4) { - setuptmvline(bits, fracmax); + setuptmvline(bits); line1 = reinterpret_cast(tmvline1); line4 = tmvline4; }); diff --git a/src/textures/textures.h b/src/textures/textures.h index ab9dc3719..bb83f79e7 100644 --- a/src/textures/textures.h +++ b/src/textures/textures.h @@ -185,6 +185,9 @@ public: // Returns the whole texture, stored in column-major order, in BGRA8 format virtual const uint32_t *GetPixelsBgra(); + // Returns true if GetPixelsBgra includes mipmaps + virtual bool Mipmapped() { return true; } + virtual int CopyTrueColorPixels(FBitmap *bmp, int x, int y, int rotate=0, FCopyInfo *inf = NULL); int CopyTrueColorTranslated(FBitmap *bmp, int x, int y, int rotate, FRemapTable *remap, FCopyInfo *inf = NULL); virtual bool UseBasePalette(); @@ -530,6 +533,7 @@ public: void SetUpdated() { bNeedsUpdate = false; bDidUpdate = true; bFirstUpdate = false; } DSimpleCanvas *GetCanvas() { return Canvas; } DSimpleCanvas *GetCanvasBgra() { return CanvasBgra; } + bool Mipmapped() override { return false; } void MakeTexture (); void MakeTextureBgra (); diff --git a/wadsrc/static/language.enu b/wadsrc/static/language.enu index f9050a27b..0bb3a84fd 100644 --- a/wadsrc/static/language.enu +++ b/wadsrc/static/language.enu @@ -1781,6 +1781,9 @@ DSPLYMNU_VSYNC = "Vertical Sync"; DSPLYMNU_CAPFPS = "Rendering Interpolation"; DSPLYMNU_COLUMNMETHOD = "Column render mode"; DSPLYMNU_TRUECOLOR = "True color output"; +DSPLYMNU_MINFILTER = "Linear filter when downscaling"; +DSPLYMNU_MAGFILTER = "Linear filter when upscaling"; +DSPLYMNU_MIPMAP = "Use mipmapped textures"; DSPLYMNU_WIPETYPE = "Screen wipe style"; DSPLYMNU_SHOWENDOOM = "Show ENDOOM screen"; DSPLYMNU_PALLETEHACK = "DirectDraw palette hack"; // Not used diff --git a/wadsrc/static/menudef.txt b/wadsrc/static/menudef.txt index 3c712de96..679db909b 100644 --- a/wadsrc/static/menudef.txt +++ b/wadsrc/static/menudef.txt @@ -662,6 +662,9 @@ OptionMenu "VideoOptions" Option "$DSPLYMNU_CAPFPS", "cl_capfps", "OffOn" Option "$DSPLYMNU_COLUMNMETHOD", "r_columnmethod", "ColumnMethods" Option "$DSPLYMNU_TRUECOLOR", "swtruecolor", "OnOff" + Option "$DSPLYMNU_MINFILTER", "r_minfilter", "OnOff" + Option "$DSPLYMNU_MAGFILTER", "r_magfilter", "OnOff" + Option "$DSPLYMNU_MIPMAP", "r_mipmap", "OnOff" StaticText " " Option "$DSPLYMNU_WIPETYPE", "wipetype", "Wipes" From 3b6d177787842f0d1844673a4b220797215fa1d8 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 25 Jun 2016 12:14:15 +0200 Subject: [PATCH 078/100] Added bicubic interpolation when generating mipmaps --- src/CMakeLists.txt | 1 + src/textures/bicubic_interpolation.cpp | 107 +++++++++++++++++++++++++ src/textures/bicubic_interpolation.h | 50 ++++++++++++ src/textures/texture.cpp | 19 +++++ src/textures/textures.h | 1 + 5 files changed, 178 insertions(+) create mode 100644 src/textures/bicubic_interpolation.cpp create mode 100644 src/textures/bicubic_interpolation.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8c0a30ea0..042da0c8f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1190,6 +1190,7 @@ set (PCH_SOURCES textures/texturemanager.cpp textures/tgatexture.cpp textures/warptexture.cpp + textures/bicubic_interpolation.cpp thingdef/olddecorations.cpp thingdef/thingdef.cpp thingdef/thingdef_codeptr.cpp diff --git a/src/textures/bicubic_interpolation.cpp b/src/textures/bicubic_interpolation.cpp new file mode 100644 index 000000000..2c8a3049d --- /dev/null +++ b/src/textures/bicubic_interpolation.cpp @@ -0,0 +1,107 @@ + +#include "doomtype.h" +#include "bicubic_interpolation.h" + +void BicubicInterpolation::ScaleImage(uint32_t *dest_data, int dest_width, int dest_height, const uint32_t *src_data, int src_width, int src_height) +{ + if (dest_width <= 0 || dest_height <= 0 || src_width <= 0 || src_height <= 0) + return; + + // Scale factor as a rational number r = n / d + int n = dest_width; + int d = src_width; + + const unsigned char *src_ptr = (const unsigned char *)src_data; + unsigned char *dest_ptr = (unsigned char *)dest_data; + + scale(n, d, src_width, src_width * 4, src_height, src_ptr + 0, dest_width, dest_width * 4, dest_height, dest_ptr + 0); + scale(n, d, src_width, src_width * 4, src_height, src_ptr + 1, dest_width, dest_width * 4, dest_height, dest_ptr + 1); + scale(n, d, src_width, src_width * 4, src_height, src_ptr + 2, dest_width, dest_width * 4, dest_height, dest_ptr + 2); + scale(n, d, src_width, src_width * 4, src_height, src_ptr + 3, dest_width, dest_width * 4, dest_height, dest_ptr + 3); +} + +void BicubicInterpolation::scale(int n, int d, int in_width, int in_pitch, int in_height, const unsigned char *f, int out_width, int out_pitch, int out_height, unsigned char *g) +{ + // Implementation of Michael J. Aramini's Efficient Image Magnification by Bicubic Spline Interpolation + + int dimension_size = (out_width > out_height) ? out_width : out_height; + L_vector.resize(dimension_size); + + for (int i=0;i<4;i++) + c_vector[i].resize(dimension_size); + h_vector.resize(in_width); + + int larger_out_dimension; + int j, k, l, m, index; + int *L = &L_vector[0]; + float x; + float *c[4] = { &c_vector[0][0], &c_vector[1][0], &c_vector[2][0], &c_vector[3][0] }; + float *h = &h_vector[0]; + + larger_out_dimension = (out_width > out_height) ? out_width : out_height; + + for (k = 0; k < larger_out_dimension; k++) + L[k] = (k * d) / n; + + for (k = 0; k < n; k++) + { + x = (float)((k * d) % n) / (float)n; + c[0][k] = C0(x); + c[1][k] = C1(x); + c[2][k] = C2(x); + c[3][k] = C3(x); + } + for (k = n; k < larger_out_dimension; k++) + for (l = 0; l < 4; l++) + c[l][k] = c[l][k % n]; + + for (k = 0; k < out_height; k++) + { + for (j = 0; j < in_width; j++) + { + h[j] = 0.0f; + for (l = 0; l < 4; l++) + { + index = L[k] + l - 1; + if ((index >= 0) && (index < in_height)) + h[j] += f[index*in_pitch+j*4] * c[3 - l][k]; + } + } + for (m = 0; m < out_width; m++) + { + x = 0.5f; + for (l = 0; l < 4; l++) + { + index = L[m] + l - 1; + if ((index >= 0) && (index < in_width)) + x += h[index] * c[3 - l][m]; + } + if (x <= 0.0f) + g[k*out_pitch+m*4] = 0; + else if (x >= 255) + g[k*out_pitch+m*4] = 255; + else + g[k*out_pitch+m*4] = (unsigned char)x; + } + } +} + +inline float BicubicInterpolation::C0(float t) +{ + return -a * t * t * t + a * t * t; +} + +inline float BicubicInterpolation::C1(float t) +{ + return -(a + 2.0f) * t * t * t + (2.0f * a + 3.0f) * t * t - a * t; +} + +inline float BicubicInterpolation::C2(float t) +{ + return (a + 2.0f) * t * t * t - (a + 3.0f) * t * t + 1.0f; +} + +inline float BicubicInterpolation::C3(float t) +{ + return a * t * t * t - 2.0f * a * t * t + a * t; +} diff --git a/src/textures/bicubic_interpolation.h b/src/textures/bicubic_interpolation.h new file mode 100644 index 000000000..da547ad83 --- /dev/null +++ b/src/textures/bicubic_interpolation.h @@ -0,0 +1,50 @@ +/* +** Bicubic Image Scaler +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +*/ + +#ifndef __BICUBIC_INTERPOLATION_H__ +#define __BICUBIC_INTERPOLATION_H__ + +#pragma once + +#include + +// Bicubic image scaler +class BicubicInterpolation +{ +public: + void ScaleImage(uint32_t *dest, int dest_width, int dest_height, const uint32_t *src, int src_width, int src_height); + +private: + void scale(int n, int d, int in_width, int in_pitch, int in_height, const unsigned char *in_data, int out_width, int out_pitch, int out_height, unsigned char *out_data); + + float a = -0.5f; // a is a spline parameter such that -1 <= a <= 0 + + inline float C0(float t); + inline float C1(float t); + inline float C2(float t); + inline float C3(float t); + + std::vector L_vector; + std::vector c_vector[4]; + std::vector h_vector; +}; + +#endif diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index f5e4d4aa8..7ff5c9ba2 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -45,6 +45,7 @@ #include "v_video.h" #include "m_fixed.h" #include "textures/textures.h" +#include "textures/bicubic_interpolation.h" #include "v_palette.h" typedef bool (*CheckFunc)(FileReader & file); @@ -381,6 +382,24 @@ int FTexture::MipmapLevels() const } void FTexture::GenerateBgraMipmaps() +{ + BicubicInterpolation bicubic; + + uint32_t *src = PixelsBgra.data(); + uint32_t *dest = src + Width * Height; + int levels = MipmapLevels(); + for (int i = 1; i < levels; i++) + { + int w = MAX(Width >> i, 1); + int h = MAX(Height >> i, 1); + + bicubic.ScaleImage(dest, h, w, src, Height, Width); + + dest += w * h; + } +} + +void FTexture::GenerateBgraMipmapsFast() { uint32_t *src = PixelsBgra.data(); uint32_t *dest = src + Width * Height; diff --git a/src/textures/textures.h b/src/textures/textures.h index bb83f79e7..ff1093a49 100644 --- a/src/textures/textures.h +++ b/src/textures/textures.h @@ -276,6 +276,7 @@ protected: void CreatePixelsBgraWithMipmaps(); void GenerateBgraMipmaps(); + void GenerateBgraMipmapsFast(); int MipmapLevels() const; public: From 4fd127651d9177a8a32b3a5415654ff741f8c459 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 26 Jun 2016 06:54:32 +0200 Subject: [PATCH 079/100] Fixed fuzz drawer crash --- src/r_draw_rgba.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 2576cfeda..a0f534164 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -826,7 +826,10 @@ public: // Handle the case where we would go out of bounds at the top: if (yl < fuzzstep) { - uint32_t bg = dest[fuzzoffset[fuzz] * fuzzstep + pitch]; + uint32_t *srcdest = dest + fuzzoffset[fuzz] * fuzzstep + pitch; + //assert(static_cast((srcdest - (uint32_t*)dc_destorg) / (_pitch)) < viewheight); + + uint32_t bg = *srcdest; uint32_t red = RPART(bg) * 3 / 4; uint32_t green = GPART(bg) * 3 / 4; @@ -842,7 +845,7 @@ public: return; } - bool lowerbounds = (yl + count * fuzzstep > _fuzzviewheight); + bool lowerbounds = (yl + (count + fuzzstep - 1) * fuzzstep > _fuzzviewheight); if (lowerbounds) count--; @@ -858,7 +861,10 @@ public: count -= cnt; do { - uint32_t bg = dest[fuzzoffset[fuzz] * fuzzstep]; + uint32_t *srcdest = dest + fuzzoffset[fuzz] * fuzzstep; + //assert(static_cast((srcdest - (uint32_t*)dc_destorg) / (_pitch)) < viewheight); + + uint32_t bg = *srcdest; uint32_t red = RPART(bg) * 3 / 4; uint32_t green = GPART(bg) * 3 / 4; @@ -875,7 +881,10 @@ public: // Handle the case where we would go out of bounds at the bottom if (lowerbounds) { - uint32_t bg = dest[fuzzoffset[fuzz] * fuzzstep - pitch]; + uint32_t *srcdest = dest + fuzzoffset[fuzz] * fuzzstep - pitch; + //assert(static_cast((srcdest - (uint32_t*)dc_destorg) / (_pitch)) < viewheight); + + uint32_t bg = *srcdest; uint32_t red = RPART(bg) * 3 / 4; uint32_t green = GPART(bg) * 3 / 4; From 928e8e0d4374cff2d2f07957d7ca77d153cec128 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 26 Jun 2016 12:53:10 +0200 Subject: [PATCH 080/100] Improved linear filtering performance by adding a lookup table --- src/r_draw_rgba.cpp | 25 ++++++++++++ src/r_draw_rgba.h | 94 +++++++++++++++++++++++++++++++++++---------- 2 files changed, 99 insertions(+), 20 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index a0f534164..bfabdfbbb 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -98,12 +98,37 @@ CVAR(Bool, r_mipmap, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); ///////////////////////////////////////////////////////////////////////////// +__m128i SampleBgra::samplertable[256 * 2]; + DrawerCommandQueue *DrawerCommandQueue::Instance() { static DrawerCommandQueue queue; return &queue; } +DrawerCommandQueue::DrawerCommandQueue() +{ + for (int inv_b = 0; inv_b < 16; inv_b++) + { + for (int inv_a = 0; inv_a < 16; inv_a++) + { + int a = 16 - inv_a; + int b = 16 - inv_b; + + int ab = a * b; + int invab = inv_a * b; + int ainvb = a * inv_b; + int invainvb = inv_a * inv_b; + + __m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab); + __m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb); + + _mm_store_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2, ab_invab); + _mm_store_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2 + 1, ainvb_invainvb); + } + } +} + DrawerCommandQueue::~DrawerCommandQueue() { StopThreads(); diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index a60fd65c7..4961fa6dc 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -222,6 +222,7 @@ class DrawerCommandQueue static DrawerCommandQueue *Instance(); + DrawerCommandQueue(); ~DrawerCommandQueue(); public: @@ -538,11 +539,75 @@ public: return (alpha << 24) | (red << 16) | (green << 8) | blue; } + +#ifndef NO_SSE + static __m128i samplertable[256 * 2]; +#endif }; ///////////////////////////////////////////////////////////////////////////// // SSE/AVX shading macros: +#define AVX2_SAMPLE_BILINEAR4_COLUMN_INIT(col0, col1, half, height, texturefracx) \ + const uint32_t *baseptr = col0[0]; \ + __m128i coloffsets0 = _mm_setr_epi32(col0[0] - baseptr, col0[1] - baseptr, col0[2] - baseptr, col0[3] - baseptr); \ + __m128i coloffsets1 = _mm_setr_epi32(col1[0] - baseptr, col1[1] - baseptr, col1[2] - baseptr, col1[3] - baseptr); \ + __m128i mhalf = _mm_loadu_si128((const __m128i*)half); \ + __m128i m127 = _mm_set1_epi16(127); \ + __m128i m16 = _mm_set1_epi32(16); \ + __m128i m15 = _mm_set1_epi32(15); \ + __m128i mheight = _mm_loadu_si128((const __m128i*)height); \ + __m128i mtexturefracx = _mm_loadu_si128((const __m128i*)texturefracx); + +#define AVX2_SAMPLE_BILINEAR4_COLUMN(fg, texturefracy) { \ + __m128i mtexturefracy = _mm_loadu_si128((const __m128i*)texturefracy); \ + __m128i multmp0 = _mm_srli_epi32(_mm_sub_epi32(mtexturefracy, mhalf), FRACBITS); \ + __m128i multmp1 = _mm_srli_epi32(_mm_add_epi32(mtexturefracy, mhalf), FRACBITS); \ + __m128i frac_y0 = _mm_or_si128(_mm_mul_epu32(multmp0, mheight), _mm_slli_si128(_mm_mul_epu32(_mm_srli_si128(multmp0, 4), _mm_srli_si128(mheight, 4)), 4)); \ + __m128i frac_y1 = _mm_or_si128(_mm_mul_epu32(multmp1, mheight), _mm_slli_si128(_mm_mul_epu32(_mm_srli_si128(multmp1, 4), _mm_srli_si128(mheight, 4)), 4)); \ + __m128i y0 = _mm_srli_epi32(frac_y0, FRACBITS); \ + __m128i y1 = _mm_srli_epi32(frac_y1, FRACBITS); \ + __m128i inv_b = mtexturefracx; \ + __m128i inv_a = _mm_and_si128(_mm_srli_epi32(frac_y1, FRACBITS - 4), m15); \ + __m128i a = _mm_sub_epi32(m16, inv_a); \ + __m128i b = _mm_sub_epi32(m16, inv_b); \ + __m128i ab = _mm_mullo_epi16(a, b); \ + __m128i invab = _mm_mullo_epi16(inv_a, b); \ + __m128i ainvb = _mm_mullo_epi16(a, inv_b); \ + __m128i invainvb = _mm_mullo_epi16(inv_a, inv_b); \ + __m128i ab_lo = _mm_shuffle_epi32(ab, _MM_SHUFFLE(1, 1, 0, 0)); \ + __m128i ab_hi = _mm_shuffle_epi32(ab, _MM_SHUFFLE(3, 3, 2, 2)); \ + __m128i invab_lo = _mm_shuffle_epi32(invab, _MM_SHUFFLE(1, 1, 0, 0)); \ + __m128i invab_hi = _mm_shuffle_epi32(invab, _MM_SHUFFLE(3, 3, 2, 2)); \ + __m128i ainvb_lo = _mm_shuffle_epi32(ainvb, _MM_SHUFFLE(1, 1, 0, 0)); \ + __m128i ainvb_hi = _mm_shuffle_epi32(ainvb, _MM_SHUFFLE(3, 3, 2, 2)); \ + __m128i invainvb_lo = _mm_shuffle_epi32(invainvb, _MM_SHUFFLE(1, 1, 0, 0)); \ + __m128i invainvb_hi = _mm_shuffle_epi32(invainvb, _MM_SHUFFLE(3, 3, 2, 2)); \ + ab_lo = _mm_or_si128(ab_lo, _mm_slli_epi32(ab_lo, 16)); \ + ab_hi = _mm_or_si128(ab_hi, _mm_slli_epi32(ab_hi, 16)); \ + invab_lo = _mm_or_si128(invab_lo, _mm_slli_epi32(invab_lo, 16)); \ + invab_hi = _mm_or_si128(invab_hi, _mm_slli_epi32(invab_hi, 16)); \ + ainvb_lo = _mm_or_si128(ainvb_lo, _mm_slli_epi32(ainvb_lo, 16)); \ + ainvb_hi = _mm_or_si128(ainvb_hi, _mm_slli_epi32(ainvb_hi, 16)); \ + invainvb_lo = _mm_or_si128(invainvb_lo, _mm_slli_epi32(invainvb_lo, 16)); \ + invainvb_hi = _mm_or_si128(invainvb_hi, _mm_slli_epi32(invainvb_hi, 16)); \ + __m128i p00 = _mm_i32gather_epi32((const int *)baseptr, _mm_add_epi32(y0, coloffsets0), 4); \ + __m128i p01 = _mm_i32gather_epi32((const int *)baseptr, _mm_add_epi32(y1, coloffsets0), 4); \ + __m128i p10 = _mm_i32gather_epi32((const int *)baseptr, _mm_add_epi32(y0, coloffsets1), 4); \ + __m128i p11 = _mm_i32gather_epi32((const int *)baseptr, _mm_add_epi32(y1, coloffsets1), 4); \ + __m128i p00_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(p00, _mm_setzero_si128()), ab_lo); \ + __m128i p01_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(p01, _mm_setzero_si128()), invab_lo); \ + __m128i p10_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(p10, _mm_setzero_si128()), ainvb_lo); \ + __m128i p11_lo = _mm_mullo_epi16(_mm_unpacklo_epi8(p11, _mm_setzero_si128()), invainvb_lo); \ + __m128i p00_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(p00, _mm_setzero_si128()), ab_hi); \ + __m128i p01_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(p01, _mm_setzero_si128()), invab_hi); \ + __m128i p10_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(p10, _mm_setzero_si128()), ainvb_hi); \ + __m128i p11_hi = _mm_mullo_epi16(_mm_unpackhi_epi8(p11, _mm_setzero_si128()), invainvb_hi); \ + __m128i fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_adds_epu16(p00_lo, p01_lo), _mm_adds_epu16(p10_lo, p11_lo)), m127), 8); \ + __m128i fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_adds_epu16(p00_hi, p01_hi), _mm_adds_epu16(p10_hi, p11_hi)), m127), 8); \ + fg = _mm_packus_epi16(fg_lo, fg_hi); \ +} + #define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, half, height) { \ __m128i m127 = _mm_set1_epi16(127); \ fg = _mm_setzero_si128(); \ @@ -550,23 +615,18 @@ public: { \ uint32_t frac_y0 = ((texturefracy[i] - half[i]) >> FRACBITS) * height[i]; \ uint32_t frac_y1 = ((texturefracy[i] + half[i]) >> FRACBITS) * height[i]; \ - uint32_t y0 = frac_y0 >> FRACBITS; \ - uint32_t y1 = frac_y1 >> FRACBITS; \ + uint32_t y0 = (frac_y0 >> FRACBITS); \ + uint32_t y1 = (frac_y1 >> FRACBITS); \ \ uint32_t inv_b = texturefracx[i]; \ uint32_t inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; \ - uint32_t a = 16 - inv_a; \ - uint32_t b = 16 - inv_b; \ \ - uint32_t ab = a * b; \ - uint32_t invab = inv_a * b; \ - uint32_t ainvb = a * inv_b; \ - uint32_t invainvb = inv_a * inv_b; \ - __m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab); \ - __m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb); \ + __m128i ab_invab = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2); \ + __m128i ainvb_invainvb = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2 + 1); \ \ - __m128i p0 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, col0[i][y1], col0[i][y0]), _mm_setzero_si128()); \ - __m128i p1 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, col1[i][y1], col1[i][y0]), _mm_setzero_si128()); \ + __m128i gather = _mm_set_epi32(col1[i][y1], col1[i][y0], col0[i][y1], col1[i][y0]); \ + __m128i p0 = _mm_unpacklo_epi8(gather, _mm_setzero_si128()); \ + __m128i p1 = _mm_unpackhi_epi8(gather, _mm_setzero_si128()); \ \ __m128i tmp = _mm_adds_epu16(_mm_mullo_epi16(p0, ab_invab), _mm_mullo_epi16(p1, ainvb_invainvb)); \ __m128i color = _mm_srli_epi16(_mm_adds_epu16(_mm_adds_epu16(_mm_srli_si128(tmp, 8), tmp), m127), 8); \ @@ -597,15 +657,9 @@ public: \ uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; \ uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; \ - uint32_t a = 16 - inv_a; \ - uint32_t b = 16 - inv_b; \ \ - uint32_t ab = a * b; \ - uint32_t invab = inv_a * b; \ - uint32_t ainvb = a * inv_b; \ - uint32_t invainvb = inv_a * inv_b; \ - __m128i ab_invab = _mm_set_epi16(invab, invab, invab, invab, ab, ab, ab, ab); \ - __m128i ainvb_invainvb = _mm_set_epi16(invainvb, invainvb, invainvb, invainvb, ainvb, ainvb, ainvb, ainvb); \ + __m128i ab_invab = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2); \ + __m128i ainvb_invainvb = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2 + 1); \ \ __m128i p0 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, p01, p00), _mm_setzero_si128()); \ __m128i p1 = _mm_unpacklo_epi8(_mm_set_epi32(0, 0, p11, p10), _mm_setzero_si128()); \ From 6c037fa24971df781b5581a42cf58651bcb71954 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 26 Jun 2016 21:23:32 +0200 Subject: [PATCH 081/100] Throwing templates at the code redundancy problem in drawers --- src/r_draw_rgba.cpp | 630 ++++++++++++++++++++-------- src/r_draw_rgba.h | 88 ++-- src/r_draw_rgba_sse.h | 923 +---------------------------------------- src/r_drawt_rgba.cpp | 5 + src/r_drawt_rgba_sse.h | 10 + src/r_segs.cpp | 3 +- 6 files changed, 551 insertions(+), 1108 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index bfabdfbbb..fbb2c12c5 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -67,8 +67,13 @@ CVAR(Bool, r_mipmap, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); #ifndef NO_SSE +#ifdef _MSC_VER +#pragma warning(disable: 4101) // warning C4101: unreferenced local variable +#endif + // Generate SSE drawers: #define VecCommand(name) name##_SSE_Command +#define VEC_SHADE_VARS SSE_SHADE_VARS #define VEC_SHADE_SIMPLE_INIT SSE_SHADE_SIMPLE_INIT #define VEC_SHADE_SIMPLE_INIT4 SSE_SHADE_SIMPLE_INIT4 #define VEC_SHADE_SIMPLE SSE_SHADE_SIMPLE @@ -1552,8 +1557,446 @@ public: return (--count) != 0; } }; + +#ifdef NO_SSE + struct NearestSampler + { + FORCEINLINE static uint32_t Sample1(DrawerWall4Command &cmd, LoopIterator &loop, int index) + { + return cmd._bufplce[index][loop.sample_index(index)]; + } + }; + struct LinearSampler + { + FORCEINLINE static uint32_t Sample1(DrawerWall4Command &cmd, LoopIterator &loop, int index) + { + return SampleBgra::sample_bilinear(cmd._bufplce[index], cmd._bufplce2[index], cmd._buftexturefracx[index], loop.vplce[index], loop.half[index], loop.height[index]); + } + }; +#else + struct NearestSampler + { + FORCEINLINE static __m128i Sample4(DrawerWall4Command &cmd, LoopIterator &loop) + { + return _mm_set_epi32(cmd._bufplce[3][loop.sample_index(3)], cmd._bufplce[2][loop.sample_index(2)], cmd._bufplce[1][loop.sample_index(1)], cmd._bufplce[0][loop.sample_index(0)]); + } + }; + + struct LinearSampler + { + FORCEINLINE static __m128i Sample4(DrawerWall4Command &cmd, LoopIterator &loop) + { + __m128i fg; + VEC_SAMPLE_BILINEAR4_COLUMN(fg, cmd._bufplce, cmd._bufplce2, cmd._buftexturefracx, loop.vplce, loop.half, loop.height); + return fg; + } + }; +#endif + +#ifdef NO_SSE + template + struct Copy + { + Copy(DrawerWall4Command &cmd, LoopIterator &loop) + { + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_bgra(Sampler::Sample1(cmd, loop, i), cmd._light[i], cmd._shade_constants); + loop.dest[i] = BlendBgra::copy(fg); + } + } + }; + + template + struct Mask + { + Mask(DrawerWall4Command &cmd, LoopIterator &loop) + { + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_bgra(Sampler::Sample1(cmd, loop, i), cmd._light[i], cmd._shade_constants); + loop.dest[i] = BlendBgra::alpha_blend(fg, loop.dest[i]); + } + } + }; + + template + struct TMaskAdd + { + TMaskAdd(DrawerWall4Command &cmd, LoopIterator &loop) + { + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_bgra(Sampler::Sample1(cmd, loop, i), cmd._light[i], cmd._shade_constants); + loop.dest[i] = BlendBgra::add(fg, loop.dest[i], cmd._srcalpha, calc_blend_bgalpha(fg, cmd._destalpha)); + } + } + }; + + template + struct TMaskSub + { + TMaskSub(DrawerWall4Command &cmd, LoopIterator &loop) + { + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_bgra(Sampler::Sample1(cmd, loop, i), cmd._light[i], cmd._shade_constants); + loop.dest[i] = BlendBgra::sub(fg, loop.dest[i], cmd._srcalpha, calc_blend_bgalpha(fg, cmd._destalpha)); + } + } + }; + + template + struct TMaskRevSub + { + TMaskRevSub(DrawerWall4Command &cmd, LoopIterator &loop) + { + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + for (int i = 0; i < 4; i++) + { + uint32_t fg = LightBgra::shade_bgra(Sampler::Sample1(cmd, loop, i), cmd._light[i], cmd._shade_constants); + loop.dest[i] = BlendBgra::revsub(fg, loop.dest[i], cmd._srcalpha, calc_blend_bgalpha(fg, cmd._destalpha)); + } + } + }; + + typedef Copy CopyNearestSimple; + typedef Copy CopyLinearSimple; + typedef Copy CopyNearest; + typedef Copy CopyLinear; + typedef Mask MaskNearestSimple; + typedef Mask MaskLinearSimple; + typedef Mask MaskNearest; + typedef Mask MaskLinear; + typedef TMaskAdd TMaskAddNearestSimple; + typedef TMaskAdd TMaskAddLinearSimple; + typedef TMaskAdd TMaskAddNearest; + typedef TMaskAdd TMaskAddLinear; + typedef TMaskSub TMaskSubNearestSimple; + typedef TMaskSub TMaskSubLinearSimple; + typedef TMaskSub TMaskSubNearest; + typedef TMaskSub TMaskSubLinear; + typedef TMaskRevSub TMaskRevSubNearestSimple; + typedef TMaskRevSub TMaskRevSubLinearSimple; + typedef TMaskRevSub TMaskRevSubNearest; + typedef TMaskRevSub TMaskRevSubLinear; +#else + template + struct CopySimple + { + VEC_SHADE_VARS(); + CopySimple(DrawerWall4Command &cmd, LoopIterator &loop) + { + VEC_SHADE_SIMPLE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0]); + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + __m128i fg = Sampler::Sample4(cmd, loop); + VEC_SHADE_SIMPLE(fg); + _mm_storeu_si128((__m128i*)loop.dest, fg); + } + }; + + template + struct Copy + { + VEC_SHADE_VARS(); + Copy(DrawerWall4Command &cmd, LoopIterator &loop) + { + VEC_SHADE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0], cmd._shade_constants); + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + __m128i fg = Sampler::Sample4(cmd, loop); + VEC_SHADE(fg, cmd._shade_constants); + _mm_storeu_si128((__m128i*)loop.dest, fg); + } + }; + + template + struct MaskSimple + { + VEC_SHADE_VARS(); + MaskSimple(DrawerWall4Command &cmd, LoopIterator &loop) + { + VEC_SHADE_SIMPLE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0]); + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + __m128i fg = Sampler::Sample4(cmd, loop); + __m128i bg = _mm_loadu_si128((const __m128i*)loop.dest); + VEC_SHADE_SIMPLE(fg); + VEC_ALPHA_BLEND(fg, bg); + _mm_storeu_si128((__m128i*)loop.dest, fg); + } + }; + + template + struct Mask + { + VEC_SHADE_VARS(); + Mask(DrawerWall4Command &cmd, LoopIterator &loop) + { + VEC_SHADE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0], cmd._shade_constants); + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + __m128i fg = Sampler::Sample4(cmd, loop); + __m128i bg = _mm_loadu_si128((const __m128i*)loop.dest); + VEC_SHADE(fg, cmd._shade_constants); + VEC_ALPHA_BLEND(fg, bg); + _mm_storeu_si128((__m128i*)loop.dest, fg); + } + }; + + template + struct TMaskAddSimple + { + VEC_SHADE_VARS(); + VEC_CALC_BLEND_ALPHA_VARS(); + TMaskAddSimple(DrawerWall4Command &cmd, LoopIterator &loop) + { + VEC_SHADE_SIMPLE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0]); + VEC_CALC_BLEND_ALPHA_INIT(cmd._srcalpha, cmd._destalpha); + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + __m128i fg = Sampler::Sample4(cmd, loop); + __m128i bg = _mm_loadu_si128((const __m128i*)loop.dest); + + VEC_CALC_BLEND_ALPHA(fg); + VEC_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)loop.dest, out); + } + }; + + template + struct TMaskAdd + { + VEC_SHADE_VARS(); + VEC_CALC_BLEND_ALPHA_VARS(); + TMaskAdd(DrawerWall4Command &cmd, LoopIterator &loop) + { + VEC_SHADE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0], cmd._shade_constants); + VEC_CALC_BLEND_ALPHA_INIT(cmd._srcalpha, cmd._destalpha); + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + __m128i fg = Sampler::Sample4(cmd, loop); + __m128i bg = _mm_loadu_si128((const __m128i*)loop.dest); + + VEC_CALC_BLEND_ALPHA(fg); + VEC_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)loop.dest, out); + } + }; + + template + struct TMaskSubSimple + { + VEC_SHADE_VARS(); + VEC_CALC_BLEND_ALPHA_VARS(); + TMaskSubSimple(DrawerWall4Command &cmd, LoopIterator &loop) + { + VEC_SHADE_SIMPLE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0]); + VEC_CALC_BLEND_ALPHA_INIT(cmd._srcalpha, cmd._destalpha); + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + __m128i fg = Sampler::Sample4(cmd, loop); + __m128i bg = _mm_loadu_si128((const __m128i*)loop.dest); + + VEC_CALC_BLEND_ALPHA(fg); + VEC_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_hi, bg_alpha_hi), _mm_mullo_epi16(fg_hi, fg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_lo, bg_alpha_lo), _mm_mullo_epi16(fg_lo, fg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)loop.dest, out); + } + }; + + template + struct TMaskSub + { + VEC_SHADE_VARS(); + VEC_CALC_BLEND_ALPHA_VARS(); + TMaskSub(DrawerWall4Command &cmd, LoopIterator &loop) + { + VEC_SHADE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0], cmd._shade_constants); + VEC_CALC_BLEND_ALPHA_INIT(cmd._srcalpha, cmd._destalpha); + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + __m128i fg = Sampler::Sample4(cmd, loop); + __m128i bg = _mm_loadu_si128((const __m128i*)loop.dest); + + VEC_CALC_BLEND_ALPHA(fg); + VEC_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_hi, bg_alpha_hi), _mm_mullo_epi16(fg_hi, fg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_lo, bg_alpha_lo), _mm_mullo_epi16(fg_lo, fg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)loop.dest, out); + } + }; + + template + struct TMaskRevSubSimple + { + VEC_SHADE_VARS(); + VEC_CALC_BLEND_ALPHA_VARS(); + TMaskRevSubSimple(DrawerWall4Command &cmd, LoopIterator &loop) + { + VEC_SHADE_SIMPLE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0]); + VEC_CALC_BLEND_ALPHA_INIT(cmd._srcalpha, cmd._destalpha); + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + __m128i fg = Sampler::Sample4(cmd, loop); + __m128i bg = _mm_loadu_si128((const __m128i*)loop.dest); + + VEC_CALC_BLEND_ALPHA(fg); + VEC_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)loop.dest, out); + } + }; + + template + struct TMaskRevSub + { + VEC_SHADE_VARS(); + VEC_CALC_BLEND_ALPHA_VARS(); + TMaskRevSub(DrawerWall4Command &cmd, LoopIterator &loop) + { + VEC_SHADE_INIT4(cmd._light[3], cmd._light[2], cmd._light[1], cmd._light[0], cmd._shade_constants); + VEC_CALC_BLEND_ALPHA_INIT(cmd._srcalpha, cmd._destalpha); + } + void Blend(DrawerWall4Command &cmd, LoopIterator &loop) + { + __m128i fg = Sampler::Sample4(cmd, loop); + __m128i bg = _mm_loadu_si128((const __m128i*)loop.dest); + + VEC_CALC_BLEND_ALPHA(fg); + VEC_SHADE_SIMPLE(fg); + + __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); + __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); + + __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); + __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); + __m128i out = _mm_packus_epi16(out_lo, out_hi); + + _mm_storeu_si128((__m128i*)loop.dest, out); + } + }; + + typedef CopySimple CopyNearestSimple; + typedef CopySimple CopyLinearSimple; + typedef Copy CopyNearest; + typedef Copy CopyLinear; + typedef MaskSimple MaskNearestSimple; + typedef MaskSimple MaskLinearSimple; + typedef Mask MaskNearest; + typedef Mask MaskLinear; + typedef TMaskAddSimple TMaskAddNearestSimple; + typedef TMaskAddSimple TMaskAddLinearSimple; + typedef TMaskAdd TMaskAddNearest; + typedef TMaskAdd TMaskAddLinear; + typedef TMaskSubSimple TMaskSubNearestSimple; + typedef TMaskSubSimple TMaskSubLinearSimple; + typedef TMaskSub TMaskSubNearest; + typedef TMaskSub TMaskSubLinear; + typedef TMaskRevSubSimple TMaskRevSubNearestSimple; + typedef TMaskRevSubSimple TMaskRevSubLinearSimple; + typedef TMaskRevSub TMaskRevSubNearest; + typedef TMaskRevSub TMaskRevSubLinear; +#endif }; +typedef DrawerBlendCommand Vlinec4NearestSimpleRGBACommand; +typedef DrawerBlendCommand Vlinec4NearestRGBACommand; +typedef DrawerBlendCommand Vlinec4LinearSimpleRGBACommand; +typedef DrawerBlendCommand Vlinec4LinearRGBACommand; +typedef DrawerBlendCommand Mvlinec4NearestSimpleRGBACommand; +typedef DrawerBlendCommand Mvlinec4NearestRGBACommand; +typedef DrawerBlendCommand Mvlinec4LinearSimpleRGBACommand; +typedef DrawerBlendCommand Mvlinec4LinearRGBACommand; +typedef DrawerBlendCommand Tmvline4AddNearestSimpleRGBACommand; +typedef DrawerBlendCommand Tmvline4AddNearestRGBACommand; +typedef DrawerBlendCommand Tmvline4AddLinearSimpleRGBACommand; +typedef DrawerBlendCommand Tmvline4AddLinearRGBACommand; +typedef DrawerBlendCommand Tmvline4AddClampNearestSimpleRGBACommand; +typedef DrawerBlendCommand Tmvline4AddClampNearestRGBACommand; +typedef DrawerBlendCommand Tmvline4AddClampLinearSimpleRGBACommand; +typedef DrawerBlendCommand Tmvline4AddClampLinearRGBACommand; +typedef DrawerBlendCommand Tmvline4SubClampNearestSimpleRGBACommand; +typedef DrawerBlendCommand Tmvline4SubClampNearestRGBACommand; +typedef DrawerBlendCommand Tmvline4SubClampLinearSimpleRGBACommand; +typedef DrawerBlendCommand Tmvline4SubClampLinearRGBACommand; +typedef DrawerBlendCommand Tmvline4RevSubClampNearestSimpleRGBACommand; +typedef DrawerBlendCommand Tmvline4RevSubClampNearestRGBACommand; +typedef DrawerBlendCommand Tmvline4RevSubClampLinearSimpleRGBACommand; +typedef DrawerBlendCommand Tmvline4RevSubClampLinearRGBACommand; + class Vlinec1RGBACommand : public DrawerWall1Command { public: @@ -1581,39 +2024,6 @@ public: } }; -class Vlinec4RGBACommand : public DrawerWall4Command -{ -public: - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - - if (_bufplce2[0] == nullptr) - { - do - { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); - loop.dest[i] = BlendBgra::copy(fg); - } - } while (loop.next()); - } - else - { - do - { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_bufplce[i], _bufplce2[i], _buftexturefracx[i], loop.sample_index(i), loop.half[i], loop.height[i]), _light[i], _shade_constants); - loop.dest[i] = BlendBgra::copy(fg); - } - } while (loop.next()); - } - } -}; - class Mvlinec1RGBACommand : public DrawerWall1Command { public: @@ -1641,39 +2051,6 @@ public: } }; -class Mvlinec4RGBACommand : public DrawerWall4Command -{ -public: - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - - if (_bufplce2[0] == nullptr) - { - do - { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); - loop.dest[i] = BlendBgra::alpha_blend(fg, loop.dest[i]); - } - } while (loop.next()); - } - else - { - do - { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_bufplce[i], _bufplce2[i], _buftexturefracx[i], loop.sample_index(i), loop.half[i], loop.height[i]), _light[i], _shade_constants); - loop.dest[i] = BlendBgra::alpha_blend(fg, loop.dest[i]); - } - } while (loop.next()); - } - } -}; - class Tmvline1AddRGBACommand : public DrawerWall1Command { public: @@ -1689,24 +2066,6 @@ public: } }; -class Tmvline4AddRGBACommand : public DrawerWall4Command -{ -public: - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); - loop.dest[i] = BlendBgra::add(fg, loop.dest[i], _srcalpha, calc_blend_bgalpha(fg, _destalpha)); - } - } while (loop.next()); - } -}; - class Tmvline1AddClampRGBACommand : public DrawerWall1Command { public: @@ -1722,24 +2081,6 @@ public: } }; -class Tmvline4AddClampRGBACommand : public DrawerWall4Command -{ -public: - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); - loop.dest[i] = BlendBgra::add(fg, loop.dest[i], _srcalpha, calc_blend_bgalpha(fg, _destalpha)); - } - } while (loop.next()); - } -}; - class Tmvline1SubClampRGBACommand : public DrawerWall1Command { public: @@ -1755,24 +2096,6 @@ public: } }; -class Tmvline4SubClampRGBACommand : public DrawerWall4Command -{ -public: - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); - loop.dest[i] = BlendBgra::sub(fg, loop.dest[i], _srcalpha, calc_blend_bgalpha(fg, _destalpha)); - } - } while (loop.next()); - } -}; - class Tmvline1RevSubClampRGBACommand : public DrawerWall1Command { public: @@ -1788,24 +2111,6 @@ public: } }; -class Tmvline4RevSubClampRGBACommand : public DrawerWall4Command -{ -public: - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - for (int i = 0; i < 4; i++) - { - uint32_t fg = LightBgra::shade_bgra(_bufplce[i][loop.sample_index(i)], _light[i], _shade_constants); - loop.dest[i] = BlendBgra::revsub(fg, loop.dest[i], _srcalpha, calc_blend_bgalpha(fg, _destalpha)); - } - } while (loop.next()); - } -}; - ///////////////////////////////////////////////////////////////////////////// class DrawFogBoundaryLineRGBACommand : public DrawerCommand @@ -2355,13 +2660,22 @@ DWORD vlinec1_rgba() return dc_texturefrac + dc_count * dc_iscale; } +template +void queue_wallcommand() +{ + if (bufplce2[0] == nullptr && dc_shade_constants.simple_shade) + DrawerCommandQueue::QueueCommand(); + else if (bufplce2[0] == nullptr) + DrawerCommandQueue::QueueCommand(); + else if (dc_shade_constants.simple_shade) + DrawerCommandQueue::QueueCommand(); + else + DrawerCommandQueue::QueueCommand(); +} + void vlinec4_rgba() { -#ifdef NO_SSE - DrawerCommandQueue::QueueCommand(); -#else - DrawerCommandQueue::QueueCommand(); -#endif + queue_wallcommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } @@ -2374,11 +2688,7 @@ DWORD mvlinec1_rgba() void mvlinec4_rgba() { -#ifdef NO_SSE - DrawerCommandQueue::QueueCommand(); -#else - DrawerCommandQueue::QueueCommand(); -#endif + queue_wallcommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } @@ -2391,11 +2701,7 @@ fixed_t tmvline1_add_rgba() void tmvline4_add_rgba() { -#ifdef NO_SSE - DrawerCommandQueue::QueueCommand(); -#else - DrawerCommandQueue::QueueCommand(); -#endif + queue_wallcommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } @@ -2408,11 +2714,7 @@ fixed_t tmvline1_addclamp_rgba() void tmvline4_addclamp_rgba() { -#ifdef NO_SSE - DrawerCommandQueue::QueueCommand(); -#else - DrawerCommandQueue::QueueCommand(); -#endif + queue_wallcommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } @@ -2425,11 +2727,7 @@ fixed_t tmvline1_subclamp_rgba() void tmvline4_subclamp_rgba() { -#ifdef NO_SSE - DrawerCommandQueue::QueueCommand(); -#else - DrawerCommandQueue::QueueCommand(); -#endif + queue_wallcommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } @@ -2442,11 +2740,7 @@ fixed_t tmvline1_revsubclamp_rgba() void tmvline4_revsubclamp_rgba() { -#ifdef NO_SSE - DrawerCommandQueue::QueueCommand(); -#else - DrawerCommandQueue::QueueCommand(); -#endif + queue_wallcommand(); for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 4961fa6dc..53572c88b 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -286,6 +286,22 @@ public: void Execute(DrawerThread *thread) override; }; +template +class DrawerBlendCommand : public CommandType +{ +public: + void Execute(DrawerThread *thread) override + { + LoopIterator loop(this, thread); + if (!loop) return; + BlendMode blend(*this, loop); + do + { + blend.Blend(*this, loop); + } while (loop.next()); + } +}; + ///////////////////////////////////////////////////////////////////////////// // Pixel shading inline functions: @@ -624,7 +640,7 @@ public: __m128i ab_invab = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2); \ __m128i ainvb_invainvb = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2 + 1); \ \ - __m128i gather = _mm_set_epi32(col1[i][y1], col1[i][y0], col0[i][y1], col1[i][y0]); \ + __m128i gather = _mm_set_epi32(col1[i][y1], col1[i][y0], col0[i][y1], col0[i][y0]); \ __m128i p0 = _mm_unpacklo_epi8(gather, _mm_setzero_si128()); \ __m128i p1 = _mm_unpackhi_epi8(gather, _mm_setzero_si128()); \ \ @@ -635,6 +651,26 @@ public: } \ } +#define VEC_SAMPLE_MIP_NEAREST4_COLUMN(fg, col0, col1, mipfrac, texturefracy, height0, height1) { \ + uint32_t y0[4], y1[4]; \ + for (int i = 0; i < 4; i++) \ + { \ + y0[i] = (texturefracy[i] >> FRACBITS) * height0[i]; \ + y1[i] = (texturefracy[i] >> FRACBITS) * height1[i]; \ + } \ + __m128i p0 = _mm_set_epi32(col0[y0[3]], col0[y0[2]], col0[y0[1]], col0[y0[0]]); \ + __m128i p1 = _mm_set_epi32(col1[y1[3]], col1[y1[2]], col1[y1[1]], col1[y1[0]]); \ + __m128i t = _mm_loadu_si128((const __m128i*)mipfrac); \ + __m128i inv_t = _mm_sub_epi32(_mm_set1_epi32(256), mipfrac); \ + __m128i p0_lo = _mm_unpacklo_epi8(p0, _mm_setzero_si128()); \ + __m128i p0_hi = _mm_unpackhi_epi8(p0, _mm_setzero_si128()); \ + __m128i p1_lo = _mm_unpacklo_epi8(p1, _mm_setzero_si128()); \ + __m128i p1_hi = _mm_unpackhi_epi8(p1, _mm_setzero_si128()); \ + __m128i fg_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(p0_lo, t), _mm_mullo_epi16(p1_lo, inv_t)), 8); \ + __m128i fg_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(p0_hi, t), _mm_mullo_epi16(p1_hi, inv_t)), 8); \ + fg = _mm_packus_epi16(fg_lo, fg_hi); \ +} + #define VEC_SAMPLE_BILINEAR4_SPAN(fg, texture, xfrac, yfrac, xstep, ystep, xbits, ybits) { \ int xshift = (32 - xbits); \ int yshift = (32 - ybits); \ @@ -844,12 +880,14 @@ FORCEINLINE uint32_t calc_blend_bgalpha(uint32_t fg, uint32_t dest_alpha) return (dest_alpha * alpha + 256 * inv_alpha + 128) >> 8; } +#define VEC_CALC_BLEND_ALPHA_VARS() __m128i msrc_alpha, mdest_alpha, m256, m255, m128; + #define VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha) \ - __m128i msrc_alpha = _mm_set1_epi16(src_alpha); \ - __m128i mdest_alpha = _mm_set1_epi16(dest_alpha * 255 / 256); \ - __m128i m256 = _mm_set1_epi16(256); \ - __m128i m255 = _mm_set1_epi16(255); \ - __m128i m128 = _mm_set1_epi16(128); + msrc_alpha = _mm_set1_epi16(src_alpha); \ + mdest_alpha = _mm_set1_epi16(dest_alpha * 255 / 256); \ + m256 = _mm_set1_epi16(256); \ + m255 = _mm_set1_epi16(255); \ + m128 = _mm_set1_epi16(128); // Calculates the final alpha values to be used when combined with the source texture alpha channel #define VEC_CALC_BLEND_ALPHA(fg) \ @@ -866,15 +904,17 @@ FORCEINLINE uint32_t calc_blend_bgalpha(uint32_t fg, uint32_t dest_alpha) fg_alpha_lo = msrc_alpha; \ } +#define SSE_SHADE_VARS() __m128i mlight_hi, mlight_lo, color, fade, fade_amount_hi, fade_amount_lo, inv_desaturate; + // Calculate constants for a simple shade #define SSE_SHADE_SIMPLE_INIT(light) \ - __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ - __m128i mlight_lo = mlight_hi; + mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ + mlight_lo = mlight_hi; // Calculate constants for a simple shade with different light levels for each pixel #define SSE_SHADE_SIMPLE_INIT4(light3, light2, light1, light0) \ - __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ - __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); + mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ + mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); // Simple shade 4 pixels #define SSE_SHADE_SIMPLE(fg) { \ @@ -889,31 +929,31 @@ FORCEINLINE uint32_t calc_blend_bgalpha(uint32_t fg, uint32_t dest_alpha) // Calculate constants for a complex shade #define SSE_SHADE_INIT(light, shade_constants) \ - __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ - __m128i mlight_lo = mlight_hi; \ - __m128i color = _mm_set_epi16( \ + mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ + mlight_lo = mlight_hi; \ + color = _mm_set_epi16( \ 256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ 256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ - __m128i fade = _mm_set_epi16( \ + fade = _mm_set_epi16( \ 0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ 0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ - __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ - __m128i fade_amount_lo = fade_amount_hi; \ - __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ + fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ + fade_amount_lo = fade_amount_hi; \ + inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ // Calculate constants for a complex shade with different light levels for each pixel #define SSE_SHADE_INIT4(light3, light2, light1, light0, shade_constants) \ - __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ - __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); \ - __m128i color = _mm_set_epi16( \ + mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ + mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); \ + color = _mm_set_epi16( \ 256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ 256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ - __m128i fade = _mm_set_epi16( \ + fade = _mm_set_epi16( \ 0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ 0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ - __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ - __m128i fade_amount_lo = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_lo)); \ - __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ + fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ + fade_amount_lo = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_lo)); \ + inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ // Complex shade 4 pixels #define SSE_SHADE(fg, shade_constants) { \ diff --git a/src/r_draw_rgba_sse.h b/src/r_draw_rgba_sse.h index ae8d3bf42..4ee557693 100644 --- a/src/r_draw_rgba_sse.h +++ b/src/r_draw_rgba_sse.h @@ -84,6 +84,7 @@ public: if (shade_constants.simple_shade) { + VEC_SHADE_VARS(); VEC_SHADE_SIMPLE_INIT(light); while (sse_count--) @@ -121,6 +122,7 @@ public: } else { + VEC_SHADE_VARS(); VEC_SHADE_INIT(light, shade_constants); while (sse_count--) @@ -184,6 +186,7 @@ public: if (shade_constants.simple_shade) { + VEC_SHADE_VARS(); VEC_SHADE_SIMPLE_INIT(light); while (sse_count--) @@ -217,6 +220,7 @@ public: } else { + VEC_SHADE_VARS(); VEC_SHADE_INIT(light, shade_constants); while (sse_count--) @@ -277,6 +281,7 @@ public: if (shade_constants.simple_shade) { + VEC_SHADE_VARS(); VEC_SHADE_SIMPLE_INIT(light); while (sse_count--) { @@ -289,6 +294,7 @@ public: } else { + VEC_SHADE_VARS(); VEC_SHADE_INIT(light, shade_constants); while (sse_count--) { @@ -317,6 +323,7 @@ public: if (shade_constants.simple_shade) { + VEC_SHADE_VARS(); VEC_SHADE_SIMPLE_INIT(light); while (sse_count--) { @@ -331,6 +338,7 @@ public: } else { + VEC_SHADE_VARS(); VEC_SHADE_INIT(light, shade_constants); while (sse_count--) { @@ -357,918 +365,3 @@ public: } } }; - -class VecCommand(Vlinec4RGBA) : public DrawerCommand -{ - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32 * RESTRICT bufplce[4]; - const uint32_t * RESTRICT bufplce2[4]; - uint32_t buftexturefracx[4]; - uint32_t bufheight[4]; - -public: - VecCommand(Vlinec4RGBA)() - { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; - bufplce2[i] = (const uint32_t *)::bufplce2[i]; - buftexturefracx[i] = ::buftexturefracx[i]; - bufheight[i] = ::bufheight[i]; - } - } - - void Execute(DrawerThread *thread) override - { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - - uint32_t height[4]; - uint32_t half[4]; - for (int i = 0; i < 4; i++) - { - height[i] = bufheight[i]; - half[i] = (0x80000000 + height[i] - 1) / height[i]; - } - - uint32_t light0 = LightBgra::calc_light_multiplier(palookuplight[0]); - uint32_t light1 = LightBgra::calc_light_multiplier(palookuplight[1]); - uint32_t light2 = LightBgra::calc_light_multiplier(palookuplight[2]); - uint32_t light3 = LightBgra::calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - - if (bufplce2[0] == nullptr) - { - if (shade_constants.simple_shade) - { - VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - uint32_t p0 = bufplce[0][((place0 >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t p1 = bufplce[1][((place1 >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t p2 = bufplce[2][((place2 >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t p3 = bufplce[3][((place3 >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - VEC_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); - } - else - { - VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - uint32_t p0 = bufplce[0][((place0 >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t p1 = bufplce[1][((place1 >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t p2 = bufplce[2][((place2 >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t p3 = bufplce[3][((place3 >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(p3, p2, p1, p0); - VEC_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); - } - } - else - { - if (shade_constants.simple_shade) - { - VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - do - { - __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, half, height); - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - VEC_SHADE_SIMPLE(fg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); - } - else - { - VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - do - { - __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, half, height); - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - VEC_SHADE(fg, shade_constants); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); - } - } - } -}; - -class VecCommand(Mvlinec4RGBA) : public DrawerCommand -{ - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - uint32_t _mvlinemax; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32 * RESTRICT bufplce[4]; - const uint32 * RESTRICT bufplce2[4]; - uint32_t buftexturefracx[4]; - uint32_t bufheight[4]; - -public: - VecCommand(Mvlinec4RGBA)() - { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; - bufplce2[i] = (const uint32_t *)::bufplce2[i]; - buftexturefracx[i] = ::buftexturefracx[i]; - bufheight[i] = ::bufheight[i]; - } - } - - void Execute(DrawerThread *thread) override - { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - uint32_t height[4]; - uint32_t half[4]; - for (int i = 0; i < 4; i++) - { - height[i] = bufheight[i]; - half[i] = (0x80000000 + height[i] - 1) / height[i]; - } - - uint32_t light0 = LightBgra::calc_light_multiplier(palookuplight[0]); - uint32_t light1 = LightBgra::calc_light_multiplier(palookuplight[1]); - uint32_t light2 = LightBgra::calc_light_multiplier(palookuplight[2]); - uint32_t light3 = LightBgra::calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - - if (bufplce2[0] == nullptr) - { - if (shade_constants.simple_shade) - { - VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - uint32_t pix0 = bufplce[0][((place0 >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t pix1 = bufplce[1][((place1 >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t pix2 = bufplce[2][((place2 >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t pix3 = bufplce[3][((place3 >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - VEC_SHADE_SIMPLE(fg); - VEC_ALPHA_BLEND(fg, bg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); - } - else - { - VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - do - { - DWORD place0 = local_vplce[0]; - DWORD place1 = local_vplce[1]; - DWORD place2 = local_vplce[2]; - DWORD place3 = local_vplce[3]; - - uint32_t pix0 = bufplce[0][((place0 >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t pix1 = bufplce[1][((place1 >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t pix2 = bufplce[2][((place2 >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t pix3 = bufplce[3][((place3 >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = place0 + local_vince[0]; - local_vplce[1] = place1 + local_vince[1]; - local_vplce[2] = place2 + local_vince[2]; - local_vplce[3] = place3 + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - VEC_SHADE(fg, shade_constants); - VEC_ALPHA_BLEND(fg, bg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); - } - } - else - { - if (shade_constants.simple_shade) - { - VEC_SHADE_SIMPLE_INIT4(light3, light2, light1, light0); - do - { - __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, half, height); - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - VEC_SHADE_SIMPLE(fg); - VEC_ALPHA_BLEND(fg, bg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); - } - else - { - VEC_SHADE_INIT4(light3, light2, light1, light0, shade_constants); - do - { - __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, bufplce, bufplce2, buftexturefracx, local_vplce, half, height); - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - VEC_SHADE(fg, shade_constants); - VEC_ALPHA_BLEND(fg, bg); - _mm_storeu_si128((__m128i*)dest, fg); - dest += pitch; - } while (--count); - } - } - } -}; - -class VecCommand(Tmvline4AddRGBA) : public DrawerCommand -{ - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32 * RESTRICT bufplce[4]; - uint32_t bufheight[4]; - -public: - VecCommand(Tmvline4AddRGBA)() - { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; - bufheight[i] = ::bufheight[i]; - } - } - - void Execute(DrawerThread *thread) override - { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - - uint32_t height[4]; - uint32_t half[4]; - for (int i = 0; i < 4; i++) - { - height[i] = bufheight[i]; - half[i] = (0x80000000 + height[i] - 1) / height[i]; - } - - uint32_t light[4]; - light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); - light[1] = LightBgra::calc_light_multiplier(palookuplight[1]); - light[2] = LightBgra::calc_light_multiplier(palookuplight[2]); - light[3] = LightBgra::calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - - if (shade_constants.simple_shade) - { - VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); - VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); - - do - { - uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - - VEC_CALC_BLEND_ALPHA(fg); - VEC_SHADE_SIMPLE(fg); - - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - __m128i out_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); - __m128i out_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); - __m128i out = _mm_packus_epi16(out_lo, out_hi); - - _mm_storeu_si128((__m128i*)dest, out); - dest += pitch; - } while (--count); - } - else - { - VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); - VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); - - do - { - uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg); - VEC_SHADE(fg, shade_constants); - - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - __m128i out_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); - __m128i out_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); - __m128i out = _mm_packus_epi16(out_lo, out_hi); - - _mm_storeu_si128((__m128i*)dest, out); - dest += pitch; - } while (--count); - } - } -}; - -class VecCommand(Tmvline4AddClampRGBA) : public DrawerCommand -{ - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32 *RESTRICT bufplce[4]; - uint32_t bufheight[4]; - -public: - VecCommand(Tmvline4AddClampRGBA)() - { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; - bufheight[i] = ::bufheight[i]; - } - } - - void Execute(DrawerThread *thread) override - { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - - uint32_t height[4]; - uint32_t half[4]; - for (int i = 0; i < 4; i++) - { - height[i] = bufheight[i]; - half[i] = (0x80000000 + height[i] - 1) / height[i]; - } - - uint32_t light[4]; - light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); - light[1] = LightBgra::calc_light_multiplier(palookuplight[1]); - light[2] = LightBgra::calc_light_multiplier(palookuplight[2]); - light[3] = LightBgra::calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - - if (shade_constants.simple_shade) - { - VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); - VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); - - do - { - uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg); - VEC_SHADE_SIMPLE(fg); - - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - __m128i out_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); - __m128i out_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); - __m128i out = _mm_packus_epi16(out_lo, out_hi); - - _mm_storeu_si128((__m128i*)dest, out); - dest += pitch; - } while (--count); - } - else - { - VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); - VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); - - do - { - uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg); - VEC_SHADE(fg, shade_constants); - - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - __m128i out_hi = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); - __m128i out_lo = _mm_srli_epi16(_mm_adds_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); - __m128i out = _mm_packus_epi16(out_lo, out_hi); - - _mm_storeu_si128((__m128i*)dest, out); - dest += pitch; - } while (--count); - } - } -}; - -class VecCommand(Tmvline4SubClampRGBA) : public DrawerCommand -{ - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32 *RESTRICT bufplce[4]; - uint32_t bufheight[4]; - -public: - VecCommand(Tmvline4SubClampRGBA)() - { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; - bufheight[i] = ::bufheight[i]; - } - } - - void Execute(DrawerThread *thread) override - { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - - uint32_t height[4]; - uint32_t half[4]; - for (int i = 0; i < 4; i++) - { - height[i] = bufheight[i]; - half[i] = (0x80000000 + height[i] - 1) / height[i]; - } - - uint32_t light[4]; - light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); - light[1] = LightBgra::calc_light_multiplier(palookuplight[1]); - light[2] = LightBgra::calc_light_multiplier(palookuplight[2]); - light[3] = LightBgra::calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - - if (shade_constants.simple_shade) - { - VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); - VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); - - do - { - uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg); - VEC_SHADE_SIMPLE(fg); - - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_hi, bg_alpha_hi), _mm_mullo_epi16(fg_hi, fg_alpha_hi)), 8); - __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_lo, bg_alpha_lo), _mm_mullo_epi16(fg_lo, fg_alpha_lo)), 8); - __m128i out = _mm_packus_epi16(out_lo, out_hi); - - _mm_storeu_si128((__m128i*)dest, out); - dest += pitch; - } while (--count); - } - else - { - VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); - VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); - - do - { - uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg); - VEC_SHADE(fg, shade_constants); - - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_hi, bg_alpha_hi), _mm_mullo_epi16(fg_hi, fg_alpha_hi)), 8); - __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(bg_lo, bg_alpha_lo), _mm_mullo_epi16(fg_lo, fg_alpha_lo)), 8); - __m128i out = _mm_packus_epi16(out_lo, out_hi); - - _mm_storeu_si128((__m128i*)dest, out); - dest += pitch; - } while (--count); - } - } -}; - -class VecCommand(Tmvline4RevSubClampRGBA) : public DrawerCommand -{ - BYTE * RESTRICT _dest; - int _count; - int _pitch; - ShadeConstants _shade_constants; - fixed_t _srcalpha; - fixed_t _destalpha; - fixed_t palookuplight[4]; - DWORD vplce[4]; - DWORD vince[4]; - const uint32 *RESTRICT bufplce[4]; - uint32_t bufheight[4]; - -public: - VecCommand(Tmvline4RevSubClampRGBA)() - { - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - for (int i = 0; i < 4; i++) - { - palookuplight[i] = ::palookuplight[i]; - vplce[i] = ::vplce[i]; - vince[i] = ::vince[i]; - bufplce[i] = (const uint32 *)::bufplce[i]; - bufheight[i] = ::bufheight[4]; - } - } - - void Execute(DrawerThread *thread) override - { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, (uint32_t*)_dest); - int pitch = _pitch * thread->num_cores; - - uint32_t height[4]; - uint32_t half[4]; - for (int i = 0; i < 4; i++) - { - height[i] = bufheight[i]; - half[i] = (0x80000000 + height[i] - 1) / height[i]; - } - - uint32_t light[4]; - light[0] = LightBgra::calc_light_multiplier(palookuplight[0]); - light[1] = LightBgra::calc_light_multiplier(palookuplight[1]); - light[2] = LightBgra::calc_light_multiplier(palookuplight[2]); - light[3] = LightBgra::calc_light_multiplier(palookuplight[3]); - - ShadeConstants shade_constants = _shade_constants; - - uint32_t src_alpha = _srcalpha >> (FRACBITS - 8); - uint32_t dest_alpha = _destalpha >> (FRACBITS - 8); - - DWORD local_vplce[4] = { vplce[0], vplce[1], vplce[2], vplce[3] }; - DWORD local_vince[4] = { vince[0], vince[1], vince[2], vince[3] }; - int skipped = thread->skipped_by_thread(_dest_y); - for (int i = 0; i < 4; i++) - { - local_vplce[i] += local_vince[i] * skipped; - local_vince[i] *= thread->num_cores; - } - - if (shade_constants.simple_shade) - { - VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); - VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); - - do - { - uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg); - VEC_SHADE_SIMPLE(fg); - - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); - __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); - __m128i out = _mm_packus_epi16(out_lo, out_hi); - - _mm_storeu_si128((__m128i*)dest, out); - dest += pitch; - } while (--count); - } - else - { - VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); - VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha); - - do - { - uint32_t pix0 = bufplce[0][((local_vplce[0] >> FRACBITS) * height[0]) >> FRACBITS]; - uint32_t pix1 = bufplce[1][((local_vplce[1] >> FRACBITS) * height[1]) >> FRACBITS]; - uint32_t pix2 = bufplce[2][((local_vplce[2] >> FRACBITS) * height[2]) >> FRACBITS]; - uint32_t pix3 = bufplce[3][((local_vplce[3] >> FRACBITS) * height[3]) >> FRACBITS]; - - local_vplce[0] = local_vplce[0] + local_vince[0]; - local_vplce[1] = local_vplce[1] + local_vince[1]; - local_vplce[2] = local_vplce[2] + local_vince[2]; - local_vplce[3] = local_vplce[3] + local_vince[3]; - - __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); - VEC_CALC_BLEND_ALPHA(fg); - VEC_SHADE(fg, shade_constants); - - __m128i bg = _mm_loadu_si128((const __m128i*)dest); - - __m128i fg_hi = _mm_unpackhi_epi8(fg, _mm_setzero_si128()); - __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); - - __m128i out_hi = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_hi, fg_alpha_hi), _mm_mullo_epi16(bg_hi, bg_alpha_hi)), 8); - __m128i out_lo = _mm_srli_epi16(_mm_subs_epu16(_mm_mullo_epi16(fg_lo, fg_alpha_lo), _mm_mullo_epi16(bg_lo, bg_alpha_lo)), 8); - __m128i out = _mm_packus_epi16(out_lo, out_hi); - - _mm_storeu_si128((__m128i*)dest, out); - dest += pitch; - } while (--count); - } - } -}; diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 82932b1f2..45bd5c029 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -53,8 +53,13 @@ extern unsigned int *horizspan[4]; #ifndef NO_SSE +#ifdef _MSC_VER +#pragma warning(disable: 4101) // warning C4101: unreferenced local variable +#endif + // Generate SSE drawers: #define VecCommand(name) name##_SSE_Command +#define VEC_SHADE_VARS SSE_SHADE_VARS #define VEC_SHADE_SIMPLE_INIT SSE_SHADE_SIMPLE_INIT #define VEC_SHADE_SIMPLE_INIT4 SSE_SHADE_SIMPLE_INIT4 #define VEC_SHADE_SIMPLE SSE_SHADE_SIMPLE diff --git a/src/r_drawt_rgba_sse.h b/src/r_drawt_rgba_sse.h index 64a77e288..7a02f2282 100644 --- a/src/r_drawt_rgba_sse.h +++ b/src/r_drawt_rgba_sse.h @@ -60,6 +60,7 @@ public: if (shade_constants.simple_shade) { + VEC_SHADE_VARS(); VEC_SHADE_SIMPLE_INIT(light); if (count & 1) { @@ -110,6 +111,7 @@ public: } else { + VEC_SHADE_VARS(); VEC_SHADE_INIT(light, shade_constants); if (count & 1) { @@ -218,6 +220,7 @@ public: if (shade_constants.simple_shade) { + VEC_SHADE_VARS(); VEC_SHADE_SIMPLE_INIT(light); __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); @@ -254,6 +257,7 @@ public: } else { + VEC_SHADE_VARS(); VEC_SHADE_INIT(light, shade_constants); __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); @@ -421,6 +425,7 @@ public: if (shade_constants.simple_shade) { + VEC_SHADE_VARS(); VEC_SHADE_SIMPLE_INIT(light); __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); @@ -457,6 +462,7 @@ public: } else { + VEC_SHADE_VARS(); VEC_SHADE_INIT(light, shade_constants); __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); @@ -547,6 +553,7 @@ public: if (shade_constants.simple_shade) { + VEC_SHADE_VARS(); VEC_SHADE_SIMPLE_INIT(light); __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); @@ -583,6 +590,7 @@ public: } else { + VEC_SHADE_VARS(); VEC_SHADE_INIT(light, shade_constants); __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); @@ -673,6 +681,7 @@ public: if (shade_constants.simple_shade) { + VEC_SHADE_VARS(); VEC_SHADE_SIMPLE_INIT(light); __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); @@ -709,6 +718,7 @@ public: } else { + VEC_SHADE_VARS(); VEC_SHADE_INIT(light, shade_constants); __m128i mfg_alpha = _mm_set_epi16(256, fg_alpha, fg_alpha, fg_alpha, 256, fg_alpha, fg_alpha, fg_alpha); diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 630d64da0..870d74894 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1146,15 +1146,16 @@ WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xof int mip_height = texture->GetHeight(); if (r_mipmap && texture->Mipmapped()) { + uint32_t xpos = (uint32_t)((((uint64_t)xoffset) << FRACBITS) / mip_width); int level = (int)MAX(magnitude - 1.0, 0.0); while (level != 0) { mipmap_offset += mip_width * mip_height; - xoffset >>= 1; level >>= 1; mip_width = MAX(mip_width >> 1, 1); mip_height = MAX(mip_height >> 1, 1); } + xoffset = (xpos >> FRACBITS) * mip_width; } const uint32_t *pixels = texture->GetPixelsBgra() + mipmap_offset; From 8f38d3af990c5e9373f109781add7448f2de3c9f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 27 Jun 2016 10:49:15 +0200 Subject: [PATCH 082/100] Replaced the bicubic interpolation filter with a simple sharpening filter --- src/CMakeLists.txt | 1 - src/r_draw_rgba.cpp | 4 + src/textures/bicubic_interpolation.cpp | 107 --------------------- src/textures/bicubic_interpolation.h | 50 ---------- src/textures/texture.cpp | 124 ++++++++++++++++++++++--- 5 files changed, 117 insertions(+), 169 deletions(-) delete mode 100644 src/textures/bicubic_interpolation.cpp delete mode 100644 src/textures/bicubic_interpolation.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 042da0c8f..8c0a30ea0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1190,7 +1190,6 @@ set (PCH_SOURCES textures/texturemanager.cpp textures/tgatexture.cpp textures/warptexture.cpp - textures/bicubic_interpolation.cpp thingdef/olddecorations.cpp thingdef/thingdef.cpp thingdef/thingdef_codeptr.cpp diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index fbb2c12c5..aa88e4302 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -103,7 +103,9 @@ CVAR(Bool, r_mipmap, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); ///////////////////////////////////////////////////////////////////////////// +#ifndef NO_SSE __m128i SampleBgra::samplertable[256 * 2]; +#endif DrawerCommandQueue *DrawerCommandQueue::Instance() { @@ -113,6 +115,7 @@ DrawerCommandQueue *DrawerCommandQueue::Instance() DrawerCommandQueue::DrawerCommandQueue() { +#ifndef NO_SSE for (int inv_b = 0; inv_b < 16; inv_b++) { for (int inv_a = 0; inv_a < 16; inv_a++) @@ -132,6 +135,7 @@ DrawerCommandQueue::DrawerCommandQueue() _mm_store_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2 + 1, ainvb_invainvb); } } +#endif } DrawerCommandQueue::~DrawerCommandQueue() diff --git a/src/textures/bicubic_interpolation.cpp b/src/textures/bicubic_interpolation.cpp deleted file mode 100644 index 2c8a3049d..000000000 --- a/src/textures/bicubic_interpolation.cpp +++ /dev/null @@ -1,107 +0,0 @@ - -#include "doomtype.h" -#include "bicubic_interpolation.h" - -void BicubicInterpolation::ScaleImage(uint32_t *dest_data, int dest_width, int dest_height, const uint32_t *src_data, int src_width, int src_height) -{ - if (dest_width <= 0 || dest_height <= 0 || src_width <= 0 || src_height <= 0) - return; - - // Scale factor as a rational number r = n / d - int n = dest_width; - int d = src_width; - - const unsigned char *src_ptr = (const unsigned char *)src_data; - unsigned char *dest_ptr = (unsigned char *)dest_data; - - scale(n, d, src_width, src_width * 4, src_height, src_ptr + 0, dest_width, dest_width * 4, dest_height, dest_ptr + 0); - scale(n, d, src_width, src_width * 4, src_height, src_ptr + 1, dest_width, dest_width * 4, dest_height, dest_ptr + 1); - scale(n, d, src_width, src_width * 4, src_height, src_ptr + 2, dest_width, dest_width * 4, dest_height, dest_ptr + 2); - scale(n, d, src_width, src_width * 4, src_height, src_ptr + 3, dest_width, dest_width * 4, dest_height, dest_ptr + 3); -} - -void BicubicInterpolation::scale(int n, int d, int in_width, int in_pitch, int in_height, const unsigned char *f, int out_width, int out_pitch, int out_height, unsigned char *g) -{ - // Implementation of Michael J. Aramini's Efficient Image Magnification by Bicubic Spline Interpolation - - int dimension_size = (out_width > out_height) ? out_width : out_height; - L_vector.resize(dimension_size); - - for (int i=0;i<4;i++) - c_vector[i].resize(dimension_size); - h_vector.resize(in_width); - - int larger_out_dimension; - int j, k, l, m, index; - int *L = &L_vector[0]; - float x; - float *c[4] = { &c_vector[0][0], &c_vector[1][0], &c_vector[2][0], &c_vector[3][0] }; - float *h = &h_vector[0]; - - larger_out_dimension = (out_width > out_height) ? out_width : out_height; - - for (k = 0; k < larger_out_dimension; k++) - L[k] = (k * d) / n; - - for (k = 0; k < n; k++) - { - x = (float)((k * d) % n) / (float)n; - c[0][k] = C0(x); - c[1][k] = C1(x); - c[2][k] = C2(x); - c[3][k] = C3(x); - } - for (k = n; k < larger_out_dimension; k++) - for (l = 0; l < 4; l++) - c[l][k] = c[l][k % n]; - - for (k = 0; k < out_height; k++) - { - for (j = 0; j < in_width; j++) - { - h[j] = 0.0f; - for (l = 0; l < 4; l++) - { - index = L[k] + l - 1; - if ((index >= 0) && (index < in_height)) - h[j] += f[index*in_pitch+j*4] * c[3 - l][k]; - } - } - for (m = 0; m < out_width; m++) - { - x = 0.5f; - for (l = 0; l < 4; l++) - { - index = L[m] + l - 1; - if ((index >= 0) && (index < in_width)) - x += h[index] * c[3 - l][m]; - } - if (x <= 0.0f) - g[k*out_pitch+m*4] = 0; - else if (x >= 255) - g[k*out_pitch+m*4] = 255; - else - g[k*out_pitch+m*4] = (unsigned char)x; - } - } -} - -inline float BicubicInterpolation::C0(float t) -{ - return -a * t * t * t + a * t * t; -} - -inline float BicubicInterpolation::C1(float t) -{ - return -(a + 2.0f) * t * t * t + (2.0f * a + 3.0f) * t * t - a * t; -} - -inline float BicubicInterpolation::C2(float t) -{ - return (a + 2.0f) * t * t * t - (a + 3.0f) * t * t + 1.0f; -} - -inline float BicubicInterpolation::C3(float t) -{ - return a * t * t * t - 2.0f * a * t * t + a * t; -} diff --git a/src/textures/bicubic_interpolation.h b/src/textures/bicubic_interpolation.h deleted file mode 100644 index da547ad83..000000000 --- a/src/textures/bicubic_interpolation.h +++ /dev/null @@ -1,50 +0,0 @@ -/* -** Bicubic Image Scaler -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef __BICUBIC_INTERPOLATION_H__ -#define __BICUBIC_INTERPOLATION_H__ - -#pragma once - -#include - -// Bicubic image scaler -class BicubicInterpolation -{ -public: - void ScaleImage(uint32_t *dest, int dest_width, int dest_height, const uint32_t *src, int src_width, int src_height); - -private: - void scale(int n, int d, int in_width, int in_pitch, int in_height, const unsigned char *in_data, int out_width, int out_pitch, int out_height, unsigned char *out_data); - - float a = -0.5f; // a is a spline parameter such that -1 <= a <= 0 - - inline float C0(float t); - inline float C1(float t); - inline float C2(float t); - inline float C3(float t); - - std::vector L_vector; - std::vector c_vector[4]; - std::vector h_vector; -}; - -#endif diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index 7ff5c9ba2..ce7874ee6 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -45,7 +45,6 @@ #include "v_video.h" #include "m_fixed.h" #include "textures/textures.h" -#include "textures/bicubic_interpolation.h" #include "v_palette.h" typedef bool (*CheckFunc)(FileReader & file); @@ -383,19 +382,122 @@ int FTexture::MipmapLevels() const void FTexture::GenerateBgraMipmaps() { - BicubicInterpolation bicubic; - - uint32_t *src = PixelsBgra.data(); - uint32_t *dest = src + Width * Height; - int levels = MipmapLevels(); - for (int i = 1; i < levels; i++) + struct Color4f { - int w = MAX(Width >> i, 1); - int h = MAX(Height >> i, 1); + float a, r, g, b; + Color4f operator*(const Color4f &v) const { return Color4f{ a * v.a, r * v.r, g * v.g, b * v.b }; } + Color4f operator/(const Color4f &v) const { return Color4f{ a / v.a, r / v.r, g / v.g, b / v.b }; } + Color4f operator+(const Color4f &v) const { return Color4f{ a + v.a, r + v.r, g + v.g, b + v.b }; } + Color4f operator-(const Color4f &v) const { return Color4f{ a - v.a, r - v.r, g - v.g, b - v.b }; } + Color4f operator*(float s) const { return Color4f{ a * s, r * s, g * s, b * s }; } + Color4f operator/(float s) const { return Color4f{ a / s, r / s, g / s, b / s }; } + Color4f operator+(float s) const { return Color4f{ a + s, r + s, g + s, b + s }; } + Color4f operator-(float s) const { return Color4f{ a - s, r - s, g - s, b - s }; } + }; - bicubic.ScaleImage(dest, h, w, src, Height, Width); + int levels = MipmapLevels(); + std::vector image(PixelsBgra.size()); - dest += w * h; + // Convert to normalized linear colorspace + { + for (int x = 0; x < Width; x++) + { + for (int y = 0; y < Height; y++) + { + uint32_t c8 = PixelsBgra[x * Height + y]; + Color4f c; + c.a = std::pow(APART(c8) * (1.0f / 255.0f), 2.2f); + c.r = std::pow(RPART(c8) * (1.0f / 255.0f), 2.2f); + c.g = std::pow(GPART(c8) * (1.0f / 255.0f), 2.2f); + c.b = std::pow(BPART(c8) * (1.0f / 255.0f), 2.2f); + image[x * Height + y] = c; + } + } + } + + // Generate mipmaps + { + std::vector smoothed(Width * Height); + Color4f *src = image.data(); + Color4f *dest = src + Width * Height; + for (int i = 1; i < levels; i++) + { + int srcw = MAX(Width >> (i - 1), 1); + int srch = MAX(Height >> (i - 1), 1); + int w = MAX(Width >> i, 1); + int h = MAX(Height >> i, 1); + + // Downscale + for (int x = 0; x < w; x++) + { + int sx0 = x * 2; + int sx1 = MIN((x + 1) * 2, srcw - 1); + for (int y = 0; y < h; y++) + { + int sy0 = y * 2; + int sy1 = MIN((y + 1) * 2, srch - 1); + + Color4f src00 = src[sy0 + sx0 * srch]; + Color4f src01 = src[sy1 + sx0 * srch]; + Color4f src10 = src[sy0 + sx1 * srch]; + Color4f src11 = src[sy1 + sx1 * srch]; + Color4f c = (src00 + src01 + src10 + src11) * 0.25f; + + dest[y + x * h] = src00; + } + } + + // Sharpen filter with a 3x3 kernel: + for (int x = 0; x < w; x++) + { + for (int y = 0; y < h; y++) + { + Color4f c = { 0.0f, 0.0f, 0.0f, 0.0f }; + for (int kx = -1; kx < 2; kx++) + { + for (int ky = -1; ky < 2; ky++) + { + int a = y + ky; + int b = x + kx; + if (a < 0) a = h - 1; + if (a == h) a = 0; + if (b < 0) b = w - 1; + if (b == h) b = 0; + c = c + dest[a + b * h]; + } + } + c = c * (1.0f / 9.0f); + smoothed[y + x * h] = c; + } + } + float k = 0.04f; + for (int j = 0; j < w * h; j++) + dest[j] = dest[j] + (dest[j] - smoothed[j]) * k; + + src = dest; + dest += w * h; + } + } + + // Convert to bgra8 sRGB colorspace + { + Color4f *src = image.data() + Width * Height; + uint32_t *dest = PixelsBgra.data() + Width * Height; + for (int i = 1; i < levels; i++) + { + int w = MAX(Width >> i, 1); + int h = MAX(Height >> i, 1); + for (int j = 0; j < w * h; j++) + { + uint32_t a = (uint32_t)clamp(std::pow(src[j].a, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t r = (uint32_t)clamp(std::pow(src[j].r, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t g = (uint32_t)clamp(std::pow(src[j].g, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t b = (uint32_t)clamp(std::pow(src[j].b, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); + dest[j] = (a << 24) | (r << 16) | (g << 8) | b; + } + src += w * h; + dest += w * h; + } } } From 200d357b0d1f609ce67fdb23c03c77836285f0e3 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 27 Jun 2016 11:43:24 +0200 Subject: [PATCH 083/100] Linear filtering bug fix --- src/r_draw_rgba.cpp | 16 ++++++++-------- src/r_draw_rgba.h | 40 ++++++++++++++++++---------------------- src/r_segs.cpp | 4 ++-- 3 files changed, 28 insertions(+), 32 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index aa88e4302..7a071e1d4 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -1436,7 +1436,7 @@ public: uint32_t frac; uint32_t texturefracx; uint32_t height; - uint32_t half; + uint32_t one; LoopIterator(DrawerWall1Command *command, DrawerThread *thread) { @@ -1451,7 +1451,7 @@ public: pitch = command->_pitch * thread->num_cores; height = command->_textureheight; - half = (0x80000000 + height - 1) / height; + one = ((0x80000000 + height - 1) / height) * 2 + 1; } explicit operator bool() @@ -1520,7 +1520,7 @@ public: uint32_t vplce[4]; uint32_t vince[4]; uint32_t height[4]; - uint32_t half[4]; + uint32_t one[4]; LoopIterator(DrawerWall4Command *command, DrawerThread *thread) { @@ -1537,7 +1537,7 @@ public: vplce[i] = command->_vplce[i] + command->_vince[i] * skipped; vince[i] = command->_vince[i] * thread->num_cores; height[i] = command->_bufheight[i]; - half[i] = (0x80000000 + height[i] - 1) / height[i]; + one[i] = ((0x80000000 + height[i] - 1) / height[i]) * 2 + 1; } } @@ -1574,7 +1574,7 @@ public: { FORCEINLINE static uint32_t Sample1(DrawerWall4Command &cmd, LoopIterator &loop, int index) { - return SampleBgra::sample_bilinear(cmd._bufplce[index], cmd._bufplce2[index], cmd._buftexturefracx[index], loop.vplce[index], loop.half[index], loop.height[index]); + return SampleBgra::sample_bilinear(cmd._bufplce[index], cmd._bufplce2[index], cmd._buftexturefracx[index], loop.vplce[index], loop.one[index], loop.height[index]); } }; #else @@ -1591,7 +1591,7 @@ public: FORCEINLINE static __m128i Sample4(DrawerWall4Command &cmd, LoopIterator &loop) { __m128i fg; - VEC_SAMPLE_BILINEAR4_COLUMN(fg, cmd._bufplce, cmd._bufplce2, cmd._buftexturefracx, loop.vplce, loop.half, loop.height); + VEC_SAMPLE_BILINEAR4_COLUMN(fg, cmd._bufplce, cmd._bufplce2, cmd._buftexturefracx, loop.vplce, loop.one, loop.height); return fg; } }; @@ -2021,7 +2021,7 @@ public: { do { - uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.half, loop.height), _light, _shade_constants); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.one, loop.height), _light, _shade_constants); *loop.dest = BlendBgra::copy(fg); } while (loop.next()); } @@ -2048,7 +2048,7 @@ public: { do { - uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.half, loop.height), _light, _shade_constants); + uint32_t fg = LightBgra::shade_bgra(SampleBgra::sample_bilinear(_source, _source2, loop.texturefracx, loop.frac, loop.one, loop.height), _light, _shade_constants); *loop.dest = BlendBgra::alpha_blend(fg, *loop.dest); } while (loop.next()); } diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 53572c88b..27d7bd035 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -502,10 +502,10 @@ public: return (magnifying && r_magfilter) || (!magnifying && r_minfilter); } - FORCEINLINE static uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, uint32_t half, uint32_t height) + FORCEINLINE static uint32_t sample_bilinear(const uint32_t *col0, const uint32_t *col1, uint32_t texturefracx, uint32_t texturefracy, uint32_t one, uint32_t height) { - uint32_t frac_y0 = ((texturefracy - half) >> FRACBITS) * height; - uint32_t frac_y1 = ((texturefracy + half) >> FRACBITS) * height; + uint32_t frac_y0 = (texturefracy >> FRACBITS) * height; + uint32_t frac_y1 = ((texturefracy + one) >> FRACBITS) * height; uint32_t y0 = frac_y0 >> FRACBITS; uint32_t y1 = frac_y1 >> FRACBITS; @@ -533,18 +533,16 @@ public: int yshift = (32 - ybits); int xmask = (1 << xshift) - 1; int ymask = (1 << yshift) - 1; - uint32_t xhalf = 1 << (xbits - 1); - uint32_t yhalf = 1 << (ybits - 1); - uint32_t x = (xfrac - xhalf) >> xbits; - uint32_t y = (yfrac - yhalf) >> ybits; + uint32_t x = xfrac >> xbits; + uint32_t y = yfrac >> ybits; uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)]; uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)]; - uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; - uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; + uint32_t inv_b = (xfrac >> (xbits - 4)) & 15; + uint32_t inv_a = (yfrac >> (ybits - 4)) & 15; uint32_t a = 16 - inv_a; uint32_t b = 16 - inv_b; @@ -564,11 +562,11 @@ public: ///////////////////////////////////////////////////////////////////////////// // SSE/AVX shading macros: -#define AVX2_SAMPLE_BILINEAR4_COLUMN_INIT(col0, col1, half, height, texturefracx) \ +#define AVX2_SAMPLE_BILINEAR4_COLUMN_INIT(col0, col1, one, height, texturefracx) \ const uint32_t *baseptr = col0[0]; \ __m128i coloffsets0 = _mm_setr_epi32(col0[0] - baseptr, col0[1] - baseptr, col0[2] - baseptr, col0[3] - baseptr); \ __m128i coloffsets1 = _mm_setr_epi32(col1[0] - baseptr, col1[1] - baseptr, col1[2] - baseptr, col1[3] - baseptr); \ - __m128i mhalf = _mm_loadu_si128((const __m128i*)half); \ + __m128i mone = _mm_loadu_si128((const __m128i*)one); \ __m128i m127 = _mm_set1_epi16(127); \ __m128i m16 = _mm_set1_epi32(16); \ __m128i m15 = _mm_set1_epi32(15); \ @@ -577,8 +575,8 @@ public: #define AVX2_SAMPLE_BILINEAR4_COLUMN(fg, texturefracy) { \ __m128i mtexturefracy = _mm_loadu_si128((const __m128i*)texturefracy); \ - __m128i multmp0 = _mm_srli_epi32(_mm_sub_epi32(mtexturefracy, mhalf), FRACBITS); \ - __m128i multmp1 = _mm_srli_epi32(_mm_add_epi32(mtexturefracy, mhalf), FRACBITS); \ + __m128i multmp0 = _mm_srli_epi32(mtexturefracy, FRACBITS); \ + __m128i multmp1 = _mm_srli_epi32(_mm_add_epi32(mtexturefracy, mone), FRACBITS); \ __m128i frac_y0 = _mm_or_si128(_mm_mul_epu32(multmp0, mheight), _mm_slli_si128(_mm_mul_epu32(_mm_srli_si128(multmp0, 4), _mm_srli_si128(mheight, 4)), 4)); \ __m128i frac_y1 = _mm_or_si128(_mm_mul_epu32(multmp1, mheight), _mm_slli_si128(_mm_mul_epu32(_mm_srli_si128(multmp1, 4), _mm_srli_si128(mheight, 4)), 4)); \ __m128i y0 = _mm_srli_epi32(frac_y0, FRACBITS); \ @@ -624,13 +622,13 @@ public: fg = _mm_packus_epi16(fg_lo, fg_hi); \ } -#define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, half, height) { \ +#define VEC_SAMPLE_BILINEAR4_COLUMN(fg, col0, col1, texturefracx, texturefracy, one, height) { \ __m128i m127 = _mm_set1_epi16(127); \ fg = _mm_setzero_si128(); \ for (int i = 0; i < 4; i++) \ { \ - uint32_t frac_y0 = ((texturefracy[i] - half[i]) >> FRACBITS) * height[i]; \ - uint32_t frac_y1 = ((texturefracy[i] + half[i]) >> FRACBITS) * height[i]; \ + uint32_t frac_y0 = (texturefracy[i] >> FRACBITS) * height[i]; \ + uint32_t frac_y1 = ((texturefracy[i] + one[i]) >> FRACBITS) * height[i]; \ uint32_t y0 = (frac_y0 >> FRACBITS); \ uint32_t y1 = (frac_y1 >> FRACBITS); \ \ @@ -676,23 +674,21 @@ public: int yshift = (32 - ybits); \ int xmask = (1 << xshift) - 1; \ int ymask = (1 << yshift) - 1; \ - uint32_t xhalf = 1 << (xbits - 1); \ - uint32_t yhalf = 1 << (ybits - 1); \ \ __m128i m127 = _mm_set1_epi16(127); \ fg = _mm_setzero_si128(); \ for (int i = 0; i < 4; i++) \ { \ - uint32_t x = (xfrac - xhalf) >> xbits; \ - uint32_t y = (yfrac - yhalf) >> ybits; \ + uint32_t x = xfrac >> xbits; \ + uint32_t y = yfrac >> ybits; \ \ uint32_t p00 = texture[(y & ymask) + ((x & xmask) << yshift)]; \ uint32_t p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)]; \ uint32_t p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)]; \ uint32_t p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)]; \ \ - uint32_t inv_b = ((xfrac + xhalf) >> (xbits - 4)) & 15; \ - uint32_t inv_a = ((yfrac + yhalf) >> (ybits - 4)) & 15; \ + uint32_t inv_b = (xfrac >> (xbits - 4)) & 15; \ + uint32_t inv_a = (yfrac >> (ybits - 4)) & 15; \ \ __m128i ab_invab = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2); \ __m128i ainvb_invainvb = _mm_load_si128(SampleBgra::samplertable + inv_b * 32 + inv_a * 2 + 1); \ diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 870d74894..96bb1f948 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1173,14 +1173,14 @@ WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xof } else { - int tx0 = ((xoffset - FRACUNIT / 2) >> FRACBITS) % mip_width; + int tx0 = (xoffset >> FRACBITS) % mip_width; if (tx0 < 0) tx0 += mip_width; int tx1 = (tx0 + 1) % mip_width; source = (BYTE*)(pixels + tx0 * mip_height); source2 = (BYTE*)(pixels + tx1 * mip_height); height = mip_height; - texturefracx = ((xoffset + FRACUNIT / 2) >> (FRACBITS - 4)) & 15; + texturefracx = (xoffset >> (FRACBITS - 4)) & 15; } } } From 7a65a0f5953c33ea32ab4600064541e82603a8be Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 27 Jun 2016 11:57:27 +0200 Subject: [PATCH 084/100] Made mipmapping a little less aggressive --- src/r_segs.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 96bb1f948..2d39a6d97 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -1147,11 +1147,12 @@ WallscanSampler::WallscanSampler(int y1, float swal, double yrepeat, fixed_t xof if (r_mipmap && texture->Mipmapped()) { uint32_t xpos = (uint32_t)((((uint64_t)xoffset) << FRACBITS) / mip_width); - int level = (int)MAX(magnitude - 1.0, 0.0); - while (level != 0) + double texture_bias = 1.7f; + double level = MAX(magnitude - 3.0, 0.0); + while (level > texture_bias) { mipmap_offset += mip_width * mip_height; - level >>= 1; + level *= 0.5f; mip_width = MAX(mip_width >> 1, 1); mip_height = MAX(mip_height >> 1, 1); } From d1617fcdf08ddc4f03dca8d92195261bf7dcc4ef Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 30 Jun 2016 13:45:06 +0200 Subject: [PATCH 085/100] GCC compile fixes --- src/r_draw_rgba.h | 4 ++++ src/textures/texture.cpp | 16 ++++++++-------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 27d7bd035..96e96530c 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -31,6 +31,10 @@ #include #include +#ifndef NO_SSE +#include +#endif + ///////////////////////////////////////////////////////////////////////////// // Drawer functions: diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index ce7874ee6..160223617 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -406,10 +406,10 @@ void FTexture::GenerateBgraMipmaps() { uint32_t c8 = PixelsBgra[x * Height + y]; Color4f c; - c.a = std::pow(APART(c8) * (1.0f / 255.0f), 2.2f); - c.r = std::pow(RPART(c8) * (1.0f / 255.0f), 2.2f); - c.g = std::pow(GPART(c8) * (1.0f / 255.0f), 2.2f); - c.b = std::pow(BPART(c8) * (1.0f / 255.0f), 2.2f); + c.a = powf(APART(c8) * (1.0f / 255.0f), 2.2f); + c.r = powf(RPART(c8) * (1.0f / 255.0f), 2.2f); + c.g = powf(GPART(c8) * (1.0f / 255.0f), 2.2f); + c.b = powf(BPART(c8) * (1.0f / 255.0f), 2.2f); image[x * Height + y] = c; } } @@ -489,10 +489,10 @@ void FTexture::GenerateBgraMipmaps() int h = MAX(Height >> i, 1); for (int j = 0; j < w * h; j++) { - uint32_t a = (uint32_t)clamp(std::pow(src[j].a, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); - uint32_t r = (uint32_t)clamp(std::pow(src[j].r, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); - uint32_t g = (uint32_t)clamp(std::pow(src[j].g, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); - uint32_t b = (uint32_t)clamp(std::pow(src[j].b, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t a = (uint32_t)clamp(powf(src[j].a, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t r = (uint32_t)clamp(powf(src[j].r, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t g = (uint32_t)clamp(powf(src[j].g, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); + uint32_t b = (uint32_t)clamp(powf(src[j].b, 1.0f / 2.2f) * 255.0f + 0.5f, 0.0f, 255.0f); dest[j] = (a << 24) | (r << 16) | (g << 8) | b; } src += w * h; From 13ef9a834c45355ba70fb029c54170e44b54cb76 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 30 Jun 2016 13:56:53 +0200 Subject: [PATCH 086/100] Compile fix for gcc/clang --- src/r_draw_rgba.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 96e96530c..c976602f6 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -296,7 +296,7 @@ class DrawerBlendCommand : public CommandType public: void Execute(DrawerThread *thread) override { - LoopIterator loop(this, thread); + typename CommandType::LoopIterator loop(this, thread); if (!loop) return; BlendMode blend(*this, loop); do From b0e9adfc10ad40dde1c080ff8cb0e034e91cd069 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 4 Jul 2016 16:33:19 +0200 Subject: [PATCH 087/100] Fix single layer skies by using a cube box rather than a cylinder --- src/r_plane.cpp | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 0ede451e0..8345a83ce 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -880,14 +880,34 @@ static DWORD lastskycol_bgra[4]; static int skycolplace; static int skycolplace_bgra; +// Treat sky as a cube rather than a cylinder +CVAR(Bool, r_cubesky, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); + // Get a column of sky when there is only one sky texture. static const BYTE *R_GetOneSkyColumn (FTexture *fronttex, int x) { - angle_t column = (skyangle + xtoviewangle[x]) ^ skyflip; - if (!r_swtruecolor) - return fronttex->GetColumn((UMulScale16(column, frontcyl) + frontpos) >> FRACBITS, NULL); + int tx; + if (r_cubesky) + { + int tx0 = (UMulScale16((skyangle + xtoviewangle[0]) ^ skyflip, frontcyl) + frontpos) >> FRACBITS; + int tx1 = tx0 - ((UMulScale16(xtoviewangle[0], frontcyl) * 2) >> FRACBITS); + tx = (int)(tx0 + (tx1 - tx0) * x / viewwidth + 0.5); + tx %= fronttex->GetWidth(); + if (tx < 0) + tx += fronttex->GetWidth(); + } else - return (const BYTE *)fronttex->GetColumnBgra((UMulScale16(column, frontcyl) + frontpos) >> FRACBITS, NULL); + { + angle_t column = (skyangle + xtoviewangle[x]) ^ skyflip; + tx = (UMulScale16(column, frontcyl) + frontpos) >> FRACBITS; + } + + if (!r_swtruecolor) + return fronttex->GetColumn(tx, NULL); + else + { + return (const BYTE *)fronttex->GetColumnBgra(tx, NULL); + } } // Get a column of sky when there are two overlapping sky textures @@ -1030,7 +1050,7 @@ static void R_DrawSky (visplane_t *pl) { // The texture does not tile nicely frontyScale *= skyscale; frontiScale = 1 / frontyScale; - R_DrawSkyStriped (pl); + //R_DrawSkyStriped (pl); } } From 19030b555f233f85334eaca0f2c8c66b91f1e577 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 6 Jul 2016 20:19:01 +0200 Subject: [PATCH 088/100] Fix sky stretching on widescreen displays --- src/r_plane.cpp | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 8345a83ce..75826d328 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -880,21 +880,17 @@ static DWORD lastskycol_bgra[4]; static int skycolplace; static int skycolplace_bgra; -// Treat sky as a cube rather than a cylinder -CVAR(Bool, r_cubesky, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); +CVAR(Bool, r_linearsky, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); // Get a column of sky when there is only one sky texture. static const BYTE *R_GetOneSkyColumn (FTexture *fronttex, int x) { int tx; - if (r_cubesky) + if (r_linearsky) { - int tx0 = (UMulScale16((skyangle + xtoviewangle[0]) ^ skyflip, frontcyl) + frontpos) >> FRACBITS; - int tx1 = tx0 - ((UMulScale16(xtoviewangle[0], frontcyl) * 2) >> FRACBITS); - tx = (int)(tx0 + (tx1 - tx0) * x / viewwidth + 0.5); - tx %= fronttex->GetWidth(); - if (tx < 0) - tx += fronttex->GetWidth(); + angle_t xangle = (angle_t)((0.5 - x / (double)viewwidth) * FocalTangent * ANGLE_90); + angle_t column = (skyangle + xangle) ^ skyflip; + tx = (UMulScale16(column, frontcyl) + frontpos) >> FRACBITS; } else { From 21390e91b8a28c71ba44bf62ee3c7545508a74e2 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 6 Aug 2016 21:04:45 +0200 Subject: [PATCH 089/100] Remove linear sky again --- src/r_plane.cpp | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/src/r_plane.cpp b/src/r_plane.cpp index e25812fbd..c751fc5dc 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -880,30 +880,16 @@ static DWORD lastskycol_bgra[4]; static int skycolplace; static int skycolplace_bgra; -CVAR(Bool, r_linearsky, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); - // Get a column of sky when there is only one sky texture. static const BYTE *R_GetOneSkyColumn (FTexture *fronttex, int x) { - int tx; - if (r_linearsky) - { - angle_t xangle = (angle_t)((0.5 - x / (double)viewwidth) * FocalTangent * ANGLE_90); - angle_t column = (skyangle + xangle) ^ skyflip; - tx = (UMulScale16(column, frontcyl) + frontpos) >> FRACBITS; - } - else - { - angle_t column = (skyangle + xtoviewangle[x]) ^ skyflip; - tx = (UMulScale16(column, frontcyl) + frontpos) >> FRACBITS; - } + angle_t column = (skyangle + xtoviewangle[x]) ^ skyflip; + int tx = (UMulScale16(column, frontcyl) + frontpos) >> FRACBITS; if (!r_swtruecolor) return fronttex->GetColumn(tx, NULL); else - { return (const BYTE *)fronttex->GetColumnBgra(tx, NULL); - } } // Get a column of sky when there are two overlapping sky textures From 7000d0ccf9a97a01ab74853ea571d753e6e252b0 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 6 Aug 2016 22:59:16 +0200 Subject: [PATCH 090/100] Change GetPixelsBgra to use CopyTrueColorPixels --- src/g_strife/strife_sbar.cpp | 11 --- src/menu/playerdisplay.cpp | 11 --- src/textures/jpegtexture.cpp | 109 ------------------------- src/textures/pngtexture.cpp | 151 ----------------------------------- src/textures/texture.cpp | 46 ++++++++--- src/textures/textures.h | 1 + src/textures/warptexture.cpp | 14 +++- 7 files changed, 45 insertions(+), 298 deletions(-) diff --git a/src/g_strife/strife_sbar.cpp b/src/g_strife/strife_sbar.cpp index e1fcb3cda..eb3fa2608 100644 --- a/src/g_strife/strife_sbar.cpp +++ b/src/g_strife/strife_sbar.cpp @@ -34,7 +34,6 @@ public: const BYTE *GetColumn (unsigned int column, const Span **spans_out); const BYTE *GetPixels (); - const uint32_t *GetPixelsBgra() override; bool CheckModified (); void SetVial (int level); @@ -116,16 +115,6 @@ const BYTE *FHealthBar::GetPixels () return Pixels; } -const uint32_t *FHealthBar::GetPixelsBgra() -{ - if (NeedRefresh) - { - MakeTexture(); - PixelsBgra.clear(); - } - return FTexture::GetPixelsBgra(); -} - void FHealthBar::SetVial (int level) { if (level < 0) diff --git a/src/menu/playerdisplay.cpp b/src/menu/playerdisplay.cpp index 7b7e9ca5d..16671975a 100644 --- a/src/menu/playerdisplay.cpp +++ b/src/menu/playerdisplay.cpp @@ -78,7 +78,6 @@ public: const BYTE *GetColumn(unsigned int column, const Span **spans_out); const BYTE *GetPixels(); - const uint32_t *GetPixelsBgra() override; bool CheckModified(); protected: @@ -247,16 +246,6 @@ const BYTE *FBackdropTexture::GetPixels() return Pixels; } -const uint32_t *FBackdropTexture::GetPixelsBgra() -{ - if (LastRenderTic != gametic) - { - Render(); - PixelsBgra.clear(); - } - return FTexture::GetPixelsBgra(); -} - //============================================================================= // // This is one plasma and two rotozoomers. I think it turned out quite awesome. diff --git a/src/textures/jpegtexture.cpp b/src/textures/jpegtexture.cpp index f44b34d08..fc629b37e 100644 --- a/src/textures/jpegtexture.cpp +++ b/src/textures/jpegtexture.cpp @@ -187,7 +187,6 @@ public: const BYTE *GetColumn (unsigned int column, const Span **spans_out); const BYTE *GetPixels (); - const uint32_t *GetPixelsBgra (); void Unload (); FTextureFormat GetFormat (); int CopyTrueColorPixels(FBitmap *bmp, int x, int y, int rotate, FCopyInfo *inf = NULL); @@ -199,7 +198,6 @@ protected: Span DummySpans[2]; void MakeTexture (); - void MakeTextureBgra (); friend class FTexture; }; @@ -358,15 +356,6 @@ const BYTE *FJPEGTexture::GetPixels () return Pixels; } -const uint32_t *FJPEGTexture::GetPixelsBgra() -{ - if (PixelsBgra.empty()) - { - MakeTextureBgra(); - } - return PixelsBgra.data(); -} - //========================================================================== // // @@ -466,104 +455,6 @@ void FJPEGTexture::MakeTexture () } } -void FJPEGTexture::MakeTextureBgra() -{ - FWadLump lump = Wads.OpenLumpNum(SourceLump); - JSAMPLE *buff = NULL; - - jpeg_decompress_struct cinfo; - jpeg_error_mgr jerr; - - CreatePixelsBgraWithMipmaps(); - - cinfo.err = jpeg_std_error(&jerr); - cinfo.err->output_message = JPEG_OutputMessage; - cinfo.err->error_exit = JPEG_ErrorExit; - jpeg_create_decompress(&cinfo); - try - { - FLumpSourceMgr sourcemgr(&lump, &cinfo); - jpeg_read_header(&cinfo, TRUE); - if (!((cinfo.out_color_space == JCS_RGB && cinfo.num_components == 3) || - (cinfo.out_color_space == JCS_CMYK && cinfo.num_components == 4) || - (cinfo.out_color_space == JCS_GRAYSCALE && cinfo.num_components == 1))) - { - Printf(TEXTCOLOR_ORANGE "Unsupported color format\n"); - throw - 1; - } - - jpeg_start_decompress(&cinfo); - - int y = 0; - buff = new BYTE[cinfo.output_width * cinfo.output_components]; - - while (cinfo.output_scanline < cinfo.output_height) - { - int num_scanlines = jpeg_read_scanlines(&cinfo, &buff, 1); - BYTE *in = buff; - uint32_t *out = PixelsBgra.data() + y; - switch (cinfo.out_color_space) - { - case JCS_RGB: - for (int x = Width; x > 0; --x) - { - uint32_t r = in[0]; - uint32_t g = in[1]; - uint32_t b = in[2]; - *out = 0xff000000 | (r << 16) | (g << 8) | b; - out += Height; - in += 3; - } - break; - - case JCS_GRAYSCALE: - for (int x = Width; x > 0; --x) - { - uint32_t gray = in[0]; - *out = 0xff000000 | (gray << 16) | (gray << 8) | gray; - out += Height; - in += 1; - } - break; - - case JCS_CMYK: - // What are you doing using a CMYK image? :) - for (int x = Width; x > 0; --x) - { - // To be precise, these calculations should use 255, but - // 256 is much faster and virtually indistinguishable. - uint32_t r = in[3] - (((256 - in[0])*in[3]) >> 8); - uint32_t g = in[3] - (((256 - in[1])*in[3]) >> 8); - uint32_t b = in[3] - (((256 - in[2])*in[3]) >> 8); - *out = 0xff000000 | (r << 16) | (g << 8) | b; - out += Height; - in += 4; - } - break; - - default: - // The other colorspaces were considered above and discarded, - // but GCC will complain without a default for them here. - break; - } - y++; - } - jpeg_finish_decompress(&cinfo); - jpeg_destroy_decompress(&cinfo); - } - catch (int) - { - Printf(TEXTCOLOR_ORANGE " in texture %s\n", Name.GetChars()); - jpeg_destroy_decompress(&cinfo); - } - if (buff != NULL) - { - delete[] buff; - } - - GenerateBgraMipmaps(); -} - //=========================================================================== // diff --git a/src/textures/pngtexture.cpp b/src/textures/pngtexture.cpp index ee4eabe90..31d76f567 100644 --- a/src/textures/pngtexture.cpp +++ b/src/textures/pngtexture.cpp @@ -57,7 +57,6 @@ public: const BYTE *GetColumn (unsigned int column, const Span **spans_out); const BYTE *GetPixels (); - const uint32_t *GetPixelsBgra (); void Unload (); FTextureFormat GetFormat (); int CopyTrueColorPixels(FBitmap *bmp, int x, int y, int rotate, FCopyInfo *inf = NULL); @@ -81,7 +80,6 @@ protected: DWORD StartOfIDAT; void MakeTexture (); - void MakeTextureBgra (); friend class FTexture; }; @@ -454,15 +452,6 @@ const BYTE *FPNGTexture::GetPixels () return Pixels; } -const uint32_t *FPNGTexture::GetPixelsBgra() -{ - if (PixelsBgra.empty()) - { - MakeTextureBgra(); - } - return PixelsBgra.data(); -} - //========================================================================== // @@ -620,146 +609,6 @@ void FPNGTexture::MakeTexture () delete lump; } -void FPNGTexture::MakeTextureBgra () -{ - FileReader *lump; - - if (SourceLump >= 0) - { - lump = new FWadLump(Wads.OpenLumpNum(SourceLump)); - } - else - { - lump = new FileReader(SourceFile.GetChars()); - } - - CreatePixelsBgraWithMipmaps(); - if (StartOfIDAT != 0) - { - DWORD len, id; - lump->Seek (StartOfIDAT, SEEK_SET); - lump->Read(&len, 4); - lump->Read(&id, 4); - - if (ColorType == 0 || ColorType == 3) /* Grayscale and paletted */ - { - std::vector src(Width*Height); - M_ReadIDAT (lump, src.data(), Width, Height, Width, BitDepth, ColorType, Interlace, BigLong((unsigned int)len)); - - if (!PngPalette.empty()) - { - for (int x = 0; x < Width; x++) - { - for (int y = 0; y < Height; y++) - { - uint32_t r = PngPalette[src[x + y * Width] * 3 + 0]; - uint32_t g = PngPalette[src[x + y * Width] * 3 + 1]; - uint32_t b = PngPalette[src[x + y * Width] * 3 + 2]; - PixelsBgra[x * Height + y] = 0xff000000 | (r << 16) | (g << 8) | b; - } - } - } - else - { - for (int x = 0; x < Width; x++) - { - for (int y = 0; y < Height; y++) - { - uint32_t gray = src[x + y * Width]; - PixelsBgra[x * Height + y] = 0xff000000 | (gray << 16) | (gray << 8) | gray; - } - } - } - } - else /* RGB and/or Alpha present */ - { - int bytesPerPixel = ColorType == 2 ? 3 : ColorType == 4 ? 2 : 4; - BYTE *tempix = new BYTE[Width * Height * bytesPerPixel]; - BYTE *in; - uint32_t *out; - int x, y, pitch, backstep; - - M_ReadIDAT (lump, tempix, Width, Height, Width*bytesPerPixel, BitDepth, ColorType, Interlace, BigLong((unsigned int)len)); - in = tempix; - out = PixelsBgra.data(); - - // Convert from source format to paletted, column-major. - // Formats with alpha maps are reduced to only 1 bit of alpha. - switch (ColorType) - { - case 2: // RGB - pitch = Width * 3; - backstep = Height * pitch - 3; - for (x = Width; x > 0; --x) - { - for (y = Height; y > 0; --y) - { - if (!HaveTrans) - { - *out++ = 0xff000000 | (((uint32_t)in[0]) << 16) | (((uint32_t)in[1]) << 8) | ((uint32_t)in[2]); - } - else - { - if (in[0] == NonPaletteTrans[0] && - in[1] == NonPaletteTrans[1] && - in[2] == NonPaletteTrans[2]) - { - *out++ = 0; - } - else - { - *out++ = 0xff000000 | (((uint32_t)in[0]) << 16) | (((uint32_t)in[1]) << 8) | ((uint32_t)in[2]); - } - } - in += pitch; - } - in -= backstep; - } - break; - - case 4: // Grayscale + Alpha - pitch = Width * 2; - backstep = Height * pitch - 2; - for (x = Width; x > 0; --x) - { - for (y = Height; y > 0; --y) - { - // output as premultiplied alpha - uint32_t alpha = in[1]; - uint32_t gray = (in[0] * alpha + 127) / 255; - *out++ = (alpha << 24) | (gray << 16) | (gray << 8) | gray; - in += pitch; - } - in -= backstep; - } - break; - - case 6: // RGB + Alpha - pitch = Width * 4; - backstep = Height * pitch - 4; - for (x = Width; x > 0; --x) - { - for (y = Height; y > 0; --y) - { - // output as premultiplied alpha - uint32_t alpha = in[3]; - uint32_t red = (in[0] * alpha + 127) / 255; - uint32_t green = (in[1] * alpha + 127) / 255; - uint32_t blue = (in[2] * alpha + 127) / 255; - *out++ = (alpha << 24) | (red << 16) | (green << 8) | blue; - in += pitch; - } - in -= backstep; - } - break; - } - delete[] tempix; - } - } - delete lump; - GenerateBgraMipmaps(); -} - //=========================================================================== // // FPNGTexture::CopyTrueColorPixels diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index 160223617..7dfe04b23 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -194,21 +194,15 @@ const uint32_t *FTexture::GetColumnBgra(unsigned int column, const Span **spans_ const uint32_t *FTexture::GetPixelsBgra() { - if (PixelsBgra.empty()) + if (PixelsBgra.empty() || CheckModified()) { - GetColumn(0, nullptr); - const BYTE *indices = GetPixels(); - if (indices == nullptr) + if (!GetColumn(0, nullptr)) return nullptr; - CreatePixelsBgraWithMipmaps(); - for (int i = 0; i < Width * Height; i++) - { - if (indices[i] != 0) - PixelsBgra[i] = 0xff000000 | GPalette.BaseColors[indices[i]].d; - else - PixelsBgra[i] = 0; - } - GenerateBgraMipmaps(); + + FBitmap bitmap; + bitmap.Create(GetWidth(), GetHeight()); + CopyTrueColorPixels(&bitmap, 0, 0); + GenerateBgraFromBitmap(bitmap); } return PixelsBgra.data(); } @@ -356,6 +350,32 @@ void FTexture::FreeSpans (Span **spans) const M_Free (spans); } +void FTexture::GenerateBgraFromBitmap(const FBitmap &bitmap) +{ + CreatePixelsBgraWithMipmaps(); + + // Transpose and premultiply alpha + const uint32_t *src = (const uint32_t *)bitmap.GetPixels(); + uint32_t *dest = PixelsBgra.data(); + for (int x = 0; x < Width; x++) + { + for (int y = 0; y < Height; y++) + { + uint32_t p = src[x + y * Width]; + uint32_t red = RPART(p); + uint32_t green = GPART(p); + uint32_t blue = BPART(p); + uint32_t alpha = APART(p); + red = (red * alpha + 127) / 255; + green = (green * alpha + 127) / 255; + blue = (blue * alpha + 127) / 255; + dest[y + x * Height] = (alpha << 24) | (red << 16) | (green << 8) | blue; + } + } + + GenerateBgraMipmaps(); +} + void FTexture::CreatePixelsBgraWithMipmaps() { int levels = MipmapLevels(); diff --git a/src/textures/textures.h b/src/textures/textures.h index ff1093a49..e5ecdc679 100644 --- a/src/textures/textures.h +++ b/src/textures/textures.h @@ -274,6 +274,7 @@ protected: std::vector PixelsBgra; + void GenerateBgraFromBitmap(const FBitmap &bitmap); void CreatePixelsBgraWithMipmaps(); void GenerateBgraMipmaps(); void GenerateBgraMipmapsFast(); diff --git a/src/textures/warptexture.cpp b/src/textures/warptexture.cpp index 0d18ab58f..91c7b9fc4 100644 --- a/src/textures/warptexture.cpp +++ b/src/textures/warptexture.cpp @@ -39,6 +39,7 @@ #include "r_utility.h" #include "textures/textures.h" #include "warpbuffer.h" +#include "v_palette.h" FWarpTexture::FWarpTexture (FTexture *source, int warptype) @@ -96,13 +97,20 @@ const BYTE *FWarpTexture::GetPixels () const uint32_t *FWarpTexture::GetPixelsBgra() { DWORD time = r_FrameTime; - if (Pixels == NULL || time != GenTime) { MakeTexture(time); - PixelsBgra.clear(); + CreatePixelsBgraWithMipmaps(); + for (int i = 0; i < Width * Height; i++) + { + if (Pixels[i] != 0) + PixelsBgra[i] = 0xff000000 | GPalette.BaseColors[Pixels[i]].d; + else + PixelsBgra[i] = 0; + } + GenerateBgraMipmapsFast(); } - return FTexture::GetPixelsBgra(); + return PixelsBgra.data(); } const BYTE *FWarpTexture::GetColumn (unsigned int column, const Span **spans_out) From 2f512e54cdee4c1d15f632fcab64d3850004028b Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 6 Aug 2016 23:12:34 +0200 Subject: [PATCH 091/100] Remove unused code --- src/textures/pngtexture.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/textures/pngtexture.cpp b/src/textures/pngtexture.cpp index 31d76f567..9a64bac61 100644 --- a/src/textures/pngtexture.cpp +++ b/src/textures/pngtexture.cpp @@ -41,7 +41,6 @@ #include "bitmap.h" #include "v_palette.h" #include "textures/textures.h" -#include //========================================================================== // @@ -74,7 +73,6 @@ protected: bool HaveTrans; WORD NonPaletteTrans[3]; - std::vector PngPalette; BYTE *PaletteMap; int PaletteSize; DWORD StartOfIDAT; @@ -268,12 +266,6 @@ FPNGTexture::FPNGTexture (FileReader &lump, int lumpnum, const FString &filename { lump.Seek (len - PaletteSize * 3, SEEK_CUR); } - for (i = 0; i < PaletteSize; i++) - { - PngPalette.push_back(p.pngpal[i][0]); - PngPalette.push_back(p.pngpal[i][1]); - PngPalette.push_back(p.pngpal[i][2]); - } for (i = PaletteSize - 1; i >= 0; --i) { p.palette[i] = MAKERGB(p.pngpal[i][0], p.pngpal[i][1], p.pngpal[i][2]); From 3c8719f9458d4f210f27e827d89ad86234717232 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Mon, 8 Aug 2016 22:35:26 +0200 Subject: [PATCH 092/100] Fix buffer overflow in FTexture::GenerateBgraMipmaps --- src/textures/texture.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index 7dfe04b23..05574e9da 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -482,7 +482,7 @@ void FTexture::GenerateBgraMipmaps() if (a < 0) a = h - 1; if (a == h) a = 0; if (b < 0) b = w - 1; - if (b == h) b = 0; + if (b == w) b = 0; c = c + dest[a + b * h]; } } From abef073ea499f85337ad688e61ac8c65c3a689ac Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 9 Aug 2016 01:17:45 +0200 Subject: [PATCH 093/100] Implemented sloped planes for true color mode --- src/r_draw_rgba.cpp | 144 +++++++++++++++++++++++++++++++++++++++----- src/r_draw_rgba.h | 3 + src/r_plane.cpp | 25 +------- 3 files changed, 134 insertions(+), 38 deletions(-) diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 7a071e1d4..69ebfeb84 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -2191,23 +2191,44 @@ public: class DrawTiltedSpanRGBACommand : public DrawerCommand { - int _y; int _x1; int _x2; + int _y; BYTE * RESTRICT _destorg; fixed_t _light; ShadeConstants _shade_constants; - const BYTE * RESTRICT _source; + FVector3 _plane_sz; + FVector3 _plane_su; + FVector3 _plane_sv; + bool _plane_shade; + int _planeshade; + float _planelightfloat; + fixed_t _pviewx; + fixed_t _pviewy; + int _xbits; + int _ybits; + const uint32_t * RESTRICT _source; public: - DrawTiltedSpanRGBACommand(int y, int x1, int x2) + DrawTiltedSpanRGBACommand(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) { - _y = y; _x1 = x1; _x2 = x2; - + _y = y; _destorg = dc_destorg; - _source = ds_source; + _light = ds_light; + _shade_constants = ds_shade_constants; + _plane_sz = plane_sz; + _plane_su = plane_su; + _plane_sv = plane_sv; + _plane_shade = plane_shade; + _planeshade = planeshade; + _planelightfloat = planelightfloat; + _pviewx = pviewx; + _pviewy = pviewy; + _source = (const uint32_t*)ds_source; + _xbits = ds_xbits; + _ybits = ds_ybits; } void Execute(DrawerThread *thread) override @@ -2215,20 +2236,103 @@ public: if (thread->line_skipped_by_thread(_y)) return; - int y = _y; - int x1 = _x1; - int x2 = _x2; + //#define SPANSIZE 32 + //#define INVSPAN 0.03125f + //#define SPANSIZE 8 + //#define INVSPAN 0.125f + #define SPANSIZE 16 + #define INVSPAN 0.0625f - // Slopes are broken currently in master. - // Until R_DrawTiltedPlane is fixed we are just going to fill with a solid color. + int source_width = 1 << _xbits; + int source_height = 1 << _ybits; - uint32_t *source = (uint32_t*)_source; - uint32_t *dest = ylookup[y] + x1 + (uint32_t*)_destorg; + uint32_t *dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; + int count = _x2 - _x1 + 1; - int count = x2 - x1 + 1; + // Depth (Z) change across the span + double iz = _plane_sz[2] + _plane_sz[1] * (centery - _y) + _plane_sz[0] * (_x1 - centerx); + + // Light change across the span + fixed_t lightstart = _light; + fixed_t lightend = lightstart; + if (_plane_shade) + { + double vis_start = iz * _planelightfloat; + double vis_end = (iz + _plane_sz[0] * count) * _planelightfloat; + + lightstart = LIGHTSCALE(vis_start, _planeshade); + lightend = LIGHTSCALE(vis_end, _planeshade); + } + fixed_t light = lightstart; + fixed_t steplight = (lightend - lightstart) / count; + + // Texture coordinates + double uz = _plane_su[2] + _plane_su[1] * (centery - _y) + _plane_su[0] * (_x1 - centerx); + double vz = _plane_sv[2] + _plane_sv[1] * (centery - _y) + _plane_sv[0] * (_x1 - centerx); + double startz = 1.f / iz; + double startu = uz*startz; + double startv = vz*startz; + double izstep = _plane_sz[0] * SPANSIZE; + double uzstep = _plane_su[0] * SPANSIZE; + double vzstep = _plane_sv[0] * SPANSIZE; + + // Linear interpolate in sizes of SPANSIZE to increase speed + while (count >= SPANSIZE) + { + iz += izstep; + uz += uzstep; + vz += vzstep; + + double endz = 1.f / iz; + double endu = uz*endz; + double endv = vz*endz; + uint32_t stepu = (uint32_t)(SQWORD((endu - startu) * INVSPAN)); + uint32_t stepv = (uint32_t)(SQWORD((endv - startv) * INVSPAN)); + uint32_t u = (uint32_t)(SQWORD(startu) + _pviewx); + uint32_t v = (uint32_t)(SQWORD(startv) + _pviewy); + + for (int i = 0; i < SPANSIZE; i++) + { + uint32_t sx = ((u >> 16) * source_width) >> 16; + uint32_t sy = ((v >> 16) * source_height) >> 16; + uint32_t fg = _source[sy + sx * source_height]; + + if (_shade_constants.simple_shade) + *(dest++) = LightBgra::shade_bgra_simple(fg, LightBgra::calc_light_multiplier(light)); + else + *(dest++) = LightBgra::shade_bgra(fg, LightBgra::calc_light_multiplier(light), _shade_constants); + + u += stepu; + v += stepv; + light += steplight; + } + startu = endu; + startv = endv; + count -= SPANSIZE; + } + + // The last few pixels at the end while (count > 0) { - *(dest++) = source[0]; + double endz = 1.f / iz; + startu = uz*endz; + startv = vz*endz; + uint32_t u = (uint32_t)(SQWORD(startu) + _pviewx); + uint32_t v = (uint32_t)(SQWORD(startv) + _pviewy); + + uint32_t sx = ((u >> 16) * source_width) >> 16; + uint32_t sy = ((v >> 16) * source_height) >> 16; + uint32_t fg = _source[sy + sx * source_height]; + + if (_shade_constants.simple_shade) + *(dest++) = LightBgra::shade_bgra_simple(fg, LightBgra::calc_light_multiplier(light)); + else + *(dest++) = LightBgra::shade_bgra(fg, LightBgra::calc_light_multiplier(light), _shade_constants); + + iz += _plane_sz[0]; + uz += _plane_su[0]; + vz += _plane_sv[0]; + light += steplight; count--; } } @@ -2633,6 +2737,16 @@ void R_FillSpan_rgba() DrawerCommandQueue::QueueCommand(); } +void R_DrawTiltedSpan_rgba(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) +{ + DrawerCommandQueue::QueueCommand(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); +} + +void R_DrawColoredSpan_rgba(int y, int x1, int x2) +{ + DrawerCommandQueue::QueueCommand(y, x1, x2); +} + static ShadeConstants slab_rgba_shade_constants; static const BYTE *slab_rgba_colormap; static fixed_t slab_rgba_light; diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index c976602f6..083258bf0 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -94,6 +94,9 @@ void R_DrawSpanAddClamp_rgba(); void R_DrawSpanMaskedAddClamp_rgba(); void R_FillSpan_rgba(); +void R_DrawTiltedSpan_rgba(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy); +void R_DrawColoredSpan_rgba(int y, int x1, int x2); + void R_SetupDrawSlab_rgba(FColormap *base_colormap, float light, int shade); void R_DrawSlab_rgba(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); diff --git a/src/r_plane.cpp b/src/r_plane.cpp index c751fc5dc..706d6fad7 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -480,23 +480,7 @@ void R_MapTiltedPlane_C (int y, int x1) void R_MapTiltedPlane_rgba (int y, int x1) { - int x2 = spanend[y]; - - // Slopes are broken currently in master. - // Until R_DrawTiltedPlane is fixed we are just going to fill with a solid color. - - uint32_t *source = (uint32_t*)ds_source; - int source_width = 1 << ds_xbits; - int source_height = 1 << ds_ybits; - - uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; - - int count = x2 - x1 + 1; - while (count > 0) - { - *(dest++) = source[0]; - count--; - } + R_DrawTiltedSpan_rgba(y, x1, spanend[y], plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); } //========================================================================== @@ -512,12 +496,7 @@ void R_MapColoredPlane_C (int y, int x1) void R_MapColoredPlane_rgba(int y, int x1) { - uint32_t *dest = ylookup[y] + x1 + (uint32_t*)dc_destorg; - int count = (spanend[y] - x1 + 1); - uint32_t light = LightBgra::calc_light_multiplier(ds_light); - uint32_t color = LightBgra::shade_pal_index_simple(ds_color, light); - for (int i = 0; i < count; i++) - dest[i] = color; + R_DrawColoredSpan_rgba(y, x1, spanend[y]); } //========================================================================== From f56250b9107ab0446c040aca51419a7c1cd25479 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 14 Aug 2016 05:10:34 +0200 Subject: [PATCH 094/100] Remove premultiplied alpha --- src/r_draw_rgba.h | 23 +++++++++++++++-------- src/textures/texture.cpp | 12 ++---------- 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 083258bf0..ca54f7263 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -473,9 +473,9 @@ public: { uint32_t alpha = APART(fg) + (APART(fg) >> 7); // 255 -> 256 uint32_t inv_alpha = 256 - alpha; - uint32_t red = MIN(RPART(fg) + (RPART(bg) * inv_alpha) / 256, 255); - uint32_t green = MIN(GPART(fg) + (GPART(bg) * inv_alpha) / 256, 255); - uint32_t blue = MIN(BPART(fg) + (BPART(bg) * inv_alpha) / 256, 255); + uint32_t red = MIN(RPART(fg) * alpha + (RPART(bg) * inv_alpha) / 256, 255); + uint32_t green = MIN(GPART(fg) * alpha + (GPART(bg) * inv_alpha) / 256, 255); + uint32_t blue = MIN(BPART(fg) * alpha + (BPART(bg) * inv_alpha) / 256, 255); return 0xff000000 | (red << 16) | (green << 8) | blue; } }; @@ -861,11 +861,18 @@ public: __m128i fg_lo = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); \ __m128i bg_hi = _mm_unpackhi_epi8(bg, _mm_setzero_si128()); \ __m128i bg_lo = _mm_unpacklo_epi8(bg, _mm_setzero_si128()); \ - __m128i m255 = _mm_set1_epi16(255); \ - __m128i inv_alpha_hi = _mm_sub_epi16(m255, _mm_shufflehi_epi16(_mm_shufflelo_epi16(fg_hi, _MM_SHUFFLE(3,3,3,3)), _MM_SHUFFLE(3,3,3,3))); \ - __m128i inv_alpha_lo = _mm_sub_epi16(m255, _mm_shufflehi_epi16(_mm_shufflelo_epi16(fg_lo, _MM_SHUFFLE(3,3,3,3)), _MM_SHUFFLE(3,3,3,3))); \ - inv_alpha_hi = _mm_add_epi16(inv_alpha_hi, _mm_srli_epi16(inv_alpha_hi, 7)); \ - inv_alpha_lo = _mm_add_epi16(inv_alpha_lo, _mm_srli_epi16(inv_alpha_lo, 7)); \ + __m128i m256 = _mm_set1_epi16(256); \ + __m128i alpha_hi = _mm_shufflehi_epi16(_mm_shufflelo_epi16(fg_hi, _MM_SHUFFLE(3,3,3,3)), _MM_SHUFFLE(3,3,3,3)); \ + __m128i alpha_lo = _mm_shufflehi_epi16(_mm_shufflelo_epi16(fg_lo, _MM_SHUFFLE(3,3,3,3)), _MM_SHUFFLE(3,3,3,3)); \ + alpha_hi = _mm_add_epi16(alpha_hi, _mm_srli_epi16(alpha_hi, 7)); \ + alpha_lo = _mm_add_epi16(alpha_lo, _mm_srli_epi16(alpha_lo, 7)); \ + __m128i inv_alpha_hi = _mm_sub_epi16(m256, alpha_hi); \ + __m128i inv_alpha_lo = _mm_sub_epi16(m256, alpha_lo); \ + fg_hi = _mm_mullo_epi16(fg_hi, alpha_hi); \ + fg_hi = _mm_srli_epi16(fg_hi, 8); \ + fg_lo = _mm_mullo_epi16(fg_lo, alpha_lo); \ + fg_lo = _mm_srli_epi16(fg_lo, 8); \ + fg = _mm_packus_epi16(fg_lo, fg_hi); \ bg_hi = _mm_mullo_epi16(bg_hi, inv_alpha_hi); \ bg_hi = _mm_srli_epi16(bg_hi, 8); \ bg_lo = _mm_mullo_epi16(bg_lo, inv_alpha_lo); \ diff --git a/src/textures/texture.cpp b/src/textures/texture.cpp index 05574e9da..12e9d8549 100644 --- a/src/textures/texture.cpp +++ b/src/textures/texture.cpp @@ -354,22 +354,14 @@ void FTexture::GenerateBgraFromBitmap(const FBitmap &bitmap) { CreatePixelsBgraWithMipmaps(); - // Transpose and premultiply alpha + // Transpose const uint32_t *src = (const uint32_t *)bitmap.GetPixels(); uint32_t *dest = PixelsBgra.data(); for (int x = 0; x < Width; x++) { for (int y = 0; y < Height; y++) { - uint32_t p = src[x + y * Width]; - uint32_t red = RPART(p); - uint32_t green = GPART(p); - uint32_t blue = BPART(p); - uint32_t alpha = APART(p); - red = (red * alpha + 127) / 255; - green = (green * alpha + 127) / 255; - blue = (blue * alpha + 127) / 255; - dest[y + x * Height] = (alpha << 24) | (red << 16) | (green << 8) | blue; + dest[y + x * Height] = src[x + y * Width]; } } From b85e3b56e38e5c1a7edfbd0640889a0dbc6b0216 Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Wed, 14 Sep 2016 02:21:35 -0400 Subject: [PATCH 095/100] Establish QZDoom --- CMakeLists.txt | 4 ++-- src/version.h | 8 ++++---- src/win32/zdoom.rc | 16 ++++++++-------- wadsrc/CMakeLists.txt | 2 +- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 80eac0c54..97c9410c2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required( VERSION 2.8.7 ) -project(GZDoom) +project(QZDoom) if( COMMAND cmake_policy ) if( POLICY CMP0011 ) @@ -68,7 +68,7 @@ IF( NOT CMAKE_BUILD_TYPE ) ENDIF() set( ZDOOM_OUTPUT_DIR ${CMAKE_BINARY_DIR} CACHE PATH "Directory where zdoom.pk3 and the executable will be created." ) -set( ZDOOM_EXE_NAME "gzdoom" CACHE FILEPATH "Name of the executable to create" ) +set( ZDOOM_EXE_NAME "qzdoom" CACHE FILEPATH "Name of the executable to create" ) if( MSVC ) # Allow the user to use ZDOOM_OUTPUT_DIR as a single release point. # Use zdoom, zdoomd, zdoom64, and zdoomd64 for the binary names diff --git a/src/version.h b/src/version.h index 0b2fa22e7..9b2cd8f1a 100644 --- a/src/version.h +++ b/src/version.h @@ -85,12 +85,12 @@ const char *GetVersionString(); #define DYNLIGHT // This is so that derivates can use the same savegame versions without worrying about engine compatibility -#define GAMESIG "GZDOOM" -#define BASEWAD "gzdoom.pk3" +#define GAMESIG "QZDOOM" +#define BASEWAD "qzdoom.pk3" // More stuff that needs to be different for derivatives. -#define GAMENAME "GZDoom" -#define GAMENAMELOWERCASE "gzdoom" +#define GAMENAME "QZDoom" +#define GAMENAMELOWERCASE "qzdoom" #define FORUM_URL "http://forum.drdteam.org" #define BUGS_FORUM_URL "http://forum.drdteam.org/viewforum.php?f=24" diff --git a/src/win32/zdoom.rc b/src/win32/zdoom.rc index 5e2226c6d..1f760d8ea 100644 --- a/src/win32/zdoom.rc +++ b/src/win32/zdoom.rc @@ -72,13 +72,13 @@ BEGIN " BEGIN\r\n" " VALUE ""Comments"", ""Thanks to id Software for creating DOOM and then releasing the source code. Thanks also to TeamTNT for creating BOOM, which ZDoom is partially based on. Includes code based on the Cajun Bot 0.97 by Martin Collberg.""\r\n" " VALUE ""CompanyName"", "" ""\r\n" - " VALUE ""FileDescription"", ""GZDoom""\r\n" + " VALUE ""FileDescription"", ""QZDoom""\r\n" " VALUE ""FileVersion"", RC_FILEVERSION2\r\n" - " VALUE ""InternalName"", ""GZDoom""\r\n" + " VALUE ""InternalName"", ""QZDoom""\r\n" " VALUE ""LegalCopyright"", ""Copyright \\u00A9 1993-1996 id Software, 1998-2010 Randy Heit, 2002-2010 Christoph Oelckers, et al.""\r\n" " VALUE ""LegalTrademarks"", ""DoomR is a Registered Trademark of id Software, Inc.""\r\n" - " VALUE ""OriginalFilename"", ""gzdoom.exe""\r\n" - " VALUE ""ProductName"", ""GZDoom""\r\n" + " VALUE ""OriginalFilename"", ""qzdoom.exe""\r\n" + " VALUE ""ProductName"", ""QZDoom""\r\n" " VALUE ""ProductVersion"", RC_PRODUCTVERSION2\r\n" " END\r\n" " END\r\n" @@ -492,13 +492,13 @@ BEGIN BEGIN VALUE "Comments", "Thanks to id Software for creating DOOM and then releasing the source code. Thanks also to TeamTNT for creating BOOM, which ZDoom is partially based on. Includes code based on the Cajun Bot 0.97 by Martin Collberg." VALUE "CompanyName", " " - VALUE "FileDescription", "GZDoom" + VALUE "FileDescription", "QZDoom" VALUE "FileVersion", RC_FILEVERSION2 - VALUE "InternalName", "GZDoom" + VALUE "InternalName", "QZDoom" VALUE "LegalCopyright", "Copyright \u00A9 1993-1996 id Software, 1998-2010 Randy Heit, 2002-2010 Christoph Oelckers, et al." VALUE "LegalTrademarks", "DoomR is a Registered Trademark of id Software, Inc." - VALUE "OriginalFilename", "gzdoom.exe" - VALUE "ProductName", "GZDoom" + VALUE "OriginalFilename", "qzdoom.exe" + VALUE "ProductName", "QZDoom" VALUE "ProductVersion", RC_PRODUCTVERSION2 END END diff --git a/wadsrc/CMakeLists.txt b/wadsrc/CMakeLists.txt index 80189a328..5a85840e0 100644 --- a/wadsrc/CMakeLists.txt +++ b/wadsrc/CMakeLists.txt @@ -1,3 +1,3 @@ cmake_minimum_required( VERSION 2.8.7 ) -add_pk3(gzdoom.pk3 ${CMAKE_CURRENT_SOURCE_DIR}/static) +add_pk3(qzdoom.pk3 ${CMAKE_CURRENT_SOURCE_DIR}/static) From 004c7de89bb812cc9f01b7f86af242c3f11fe5c6 Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Wed, 14 Sep 2016 04:03:39 -0400 Subject: [PATCH 096/100] Part 1 of code merge --- src/gl/renderer/gl_colormap.h | 1 - src/r_data/colormaps.cpp | 2 +- src/r_data/colormaps.h | 8 ++++---- src/r_defs.h | 4 ++-- src/r_draw.cpp | 14 +++++++------- src/r_draw.h | 12 ++++++------ src/r_draw_rgba.cpp | 2 +- src/r_draw_rgba.h | 2 +- src/r_main.cpp | 2 +- src/r_main.h | 2 +- src/r_swrenderer.cpp | 2 +- src/r_things.cpp | 8 ++++---- src/r_things.h | 2 +- src/win32/win32gliface.cpp | 5 +++-- src/win32/win32gliface.h | 2 +- 15 files changed, 34 insertions(+), 34 deletions(-) diff --git a/src/gl/renderer/gl_colormap.h b/src/gl/renderer/gl_colormap.h index 2122b1248..d66950309 100644 --- a/src/gl/renderer/gl_colormap.h +++ b/src/gl/renderer/gl_colormap.h @@ -75,5 +75,4 @@ struct FColormap }; - #endif diff --git a/src/r_data/colormaps.cpp b/src/r_data/colormaps.cpp index ffaaa38ac..3bfc89b4b 100644 --- a/src/r_data/colormaps.cpp +++ b/src/r_data/colormaps.cpp @@ -71,7 +71,7 @@ struct FakeCmap }; TArray fakecmaps; -FColormap realcolormaps; +FSWColormap realcolormaps; size_t numfakecmaps; diff --git a/src/r_data/colormaps.h b/src/r_data/colormaps.h index bda6a5ea4..039a85189 100644 --- a/src/r_data/colormaps.h +++ b/src/r_data/colormaps.h @@ -9,10 +9,10 @@ void R_DeinitColormaps (); DWORD R_ColormapNumForName(const char *name); // killough 4/4/98 void R_SetDefaultColormap (const char *name); // [RH] change normal fadetable DWORD R_BlendForColormap (DWORD map); // [RH] return calculated blend for a colormap -extern FColormap realcolormaps; // [RH] make the colormaps externally visible +extern FSWColormap realcolormaps; // [RH] make the colormaps externally visible extern size_t numfakecmaps; -struct FColormap +struct FSWColormap { BYTE *Maps = nullptr; PalEntry Color = 0xffffffff; @@ -20,7 +20,7 @@ struct FColormap int Desaturate = 0; }; -struct FDynamicColormap : FColormap +struct FDynamicColormap : FSWColormap { void ChangeFade (PalEntry fadecolor); void ChangeColor (PalEntry lightcolor, int desaturate); @@ -47,7 +47,7 @@ enum }; -struct FSpecialColormap : FColormap +struct FSpecialColormap : FSWColormap { FSpecialColormap() { diff --git a/src/r_defs.h b/src/r_defs.h index f4bfbcdcf..7d7ad7eab 100644 --- a/src/r_defs.h +++ b/src/r_defs.h @@ -1509,13 +1509,13 @@ struct FMiniBSP // typedef BYTE lighttable_t; // This could be wider for >8 bit display. -struct FColormap; +struct FSWColormap; // This encapsulates the fields of vissprite_t that can be altered by AlterWeaponSprite struct visstyle_t { int ColormapNum; // Which colormap is rendered - FColormap *BaseColormap; // Base colormap used together with ColormapNum + FSWColormap *BaseColormap; // Base colormap used together with ColormapNum float Alpha; FRenderStyle RenderStyle; }; diff --git a/src/r_draw.cpp b/src/r_draw.cpp index 682ed4668..f255352f5 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -100,7 +100,7 @@ void (*R_DrawFogBoundary)(int x1, int x2, short *uclip, short *dclip); void (*R_MapTiltedPlane)(int y, int x1); void (*R_MapColoredPlane)(int y, int x1); void (*R_DrawParticle)(vissprite_t *); -void (*R_SetupDrawSlab)(FColormap *base_colormap, float light, int shade); +void (*R_SetupDrawSlab)(FSWColormap *base_colormap, float light, int shade); void (*R_DrawSlab)(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); fixed_t (*tmvline1_add)(); void (*tmvline4_add)(); @@ -146,7 +146,7 @@ extern "C" { int dc_pitch=0xABadCafe; // [RH] Distance between rows lighttable_t* dc_colormap; -FColormap *dc_fcolormap; +FSWColormap *dc_fcolormap; ShadeConstants dc_shade_constants; fixed_t dc_light; int dc_x; @@ -1032,7 +1032,7 @@ int ds_y; int ds_x1; int ds_x2; -FColormap* ds_fcolormap; +FSWColormap* ds_fcolormap; lighttable_t* ds_colormap; ShadeConstants ds_shade_constants; dsfixed_t ds_light; @@ -2413,10 +2413,10 @@ void R_InitColumnDrawers () R_DrawParticle = R_DrawParticle_C; #ifdef X86_ASM - R_SetupDrawSlab = [](FColormap *colormap, float light, int shade) { R_SetupDrawSlabA(colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT)); }; + R_SetupDrawSlab = [](FSWColormap *colormap, float light, int shade) { R_SetupDrawSlabA(colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT)); }; R_DrawSlab = R_DrawSlabA; #else - R_SetupDrawSlab = [](FColormap *colormap, float light, int shade) { R_SetupDrawSlabC(colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT)); }; + R_SetupDrawSlab = [](FSWColormap *colormap, float light, int shade) { R_SetupDrawSlabC(colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT)); }; R_DrawSlab = R_DrawSlabC; #endif @@ -2806,7 +2806,7 @@ void R_SetTranslationMap(lighttable_t *translation) } } -void R_SetColorMapLight(FColormap *base_colormap, float light, int shade) +void R_SetColorMapLight(FSWColormap *base_colormap, float light, int shade) { dc_fcolormap = base_colormap; if (r_swtruecolor) @@ -2830,7 +2830,7 @@ void R_SetColorMapLight(FColormap *base_colormap, float light, int shade) } } -void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade) +void R_SetDSColorMapLight(FSWColormap *base_colormap, float light, int shade) { ds_fcolormap = base_colormap; if (r_swtruecolor) diff --git a/src/r_draw.h b/src/r_draw.h index 591ae0b5f..204f2a493 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -31,7 +31,7 @@ extern "C" int fuzzoffset[FUZZTABLE + 1]; // [RH] +1 for the assembly routine extern "C" int fuzzpos; extern "C" int fuzzviewheight; -struct FColormap; +struct FSWColormap; struct ShadeConstants { @@ -52,7 +52,7 @@ extern "C" int ylookup[MAXHEIGHT]; extern "C" int dc_pitch; // [RH] Distance between rows extern "C" lighttable_t*dc_colormap; -extern "C" FColormap *dc_fcolormap; +extern "C" FSWColormap *dc_fcolormap; extern "C" ShadeConstants dc_shade_constants; extern "C" fixed_t dc_light; extern "C" int dc_x; @@ -288,7 +288,7 @@ void R_FillColumnP_C (void); void R_FillColumnHorizP_C (void); void R_FillSpan_C (void); -extern void(*R_SetupDrawSlab)(FColormap *base_colormap, float light, int shade); +extern void(*R_SetupDrawSlab)(FSWColormap *base_colormap, float light, int shade); extern void(*R_DrawSlab)(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); #ifdef X86_ASM @@ -303,7 +303,7 @@ extern "C" int ds_y; extern "C" int ds_x1; extern "C" int ds_x2; -extern "C" FColormap* ds_fcolormap; +extern "C" FSWColormap* ds_fcolormap; extern "C" lighttable_t* ds_colormap; extern "C" ShadeConstants ds_shade_constants; extern "C" dsfixed_t ds_light; @@ -374,10 +374,10 @@ void maskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_ void transmaskwallscan (int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int col)=R_GetColumn); // Sets dc_colormap and dc_light to their appropriate values depending on the output format (pal vs true color) -void R_SetColorMapLight(FColormap *base_colormap, float light, int shade); +void R_SetColorMapLight(FSWColormap *base_colormap, float light, int shade); // Same as R_SetColorMapLight, but for ds_colormap and ds_light -void R_SetDSColorMapLight(FColormap *base_colormap, float light, int shade); +void R_SetDSColorMapLight(FSWColormap *base_colormap, float light, int shade); void R_SetTranslationMap(lighttable_t *translation); diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 69ebfeb84..0d86ead47 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -2751,7 +2751,7 @@ static ShadeConstants slab_rgba_shade_constants; static const BYTE *slab_rgba_colormap; static fixed_t slab_rgba_light; -void R_SetupDrawSlab_rgba(FColormap *base_colormap, float light, int shade) +void R_SetupDrawSlab_rgba(FSWColormap *base_colormap, float light, int shade) { slab_rgba_shade_constants.light_red = base_colormap->Color.r * 256 / 255; slab_rgba_shade_constants.light_green = base_colormap->Color.g * 256 / 255; diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index ca54f7263..df3d0f233 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -97,7 +97,7 @@ void R_FillSpan_rgba(); void R_DrawTiltedSpan_rgba(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy); void R_DrawColoredSpan_rgba(int y, int x1, int x2); -void R_SetupDrawSlab_rgba(FColormap *base_colormap, float light, int shade); +void R_SetupDrawSlab_rgba(FSWColormap *base_colormap, float light, int shade); void R_DrawSlab_rgba(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p); void R_DrawFogBoundary_rgba(int x1, int x2, short *uclip, short *dclip); diff --git a/src/r_main.cpp b/src/r_main.cpp index 5ff80b101..ba02a7c60 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -120,7 +120,7 @@ double FocalLengthX; double FocalLengthY; FDynamicColormap*basecolormap; // [RH] colormap currently drawing with int fixedlightlev; -FColormap *fixedcolormap; +FSWColormap *fixedcolormap; FSpecialColormap *realfixedcolormap; double WallTMapScale2; diff --git a/src/r_main.h b/src/r_main.h index fa8fe0bb1..8d1867526 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -106,7 +106,7 @@ extern double r_SpriteVisibility; extern int r_actualextralight; extern bool foggy; extern int fixedlightlev; -extern FColormap* fixedcolormap; +extern FSWColormap* fixedcolormap; extern FSpecialColormap*realfixedcolormap; diff --git a/src/r_swrenderer.cpp b/src/r_swrenderer.cpp index 034275101..9bc8e4b96 100644 --- a/src/r_swrenderer.cpp +++ b/src/r_swrenderer.cpp @@ -340,7 +340,7 @@ void FSoftwareRenderer::RenderTextureView (FCanvasTexture *tex, AActor *viewpoin // curse Doom's overuse of global variables in the renderer. // These get clobbered by rendering to a camera texture but they need to be preserved so the final rendering can be done with the correct palette. - FColormap *savecolormap = fixedcolormap; + FSWColormap *savecolormap = fixedcolormap; FSpecialColormap *savecm = realfixedcolormap; DAngle savedfov = FieldOfView; diff --git a/src/r_things.cpp b/src/r_things.cpp index 013fc7152..6f1fb2700 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -1481,7 +1481,7 @@ void R_DrawPSprite(DPSprite *pspr, AActor *owner, float bobx, float boby, double if (camera->Inventory != nullptr) { BYTE oldcolormapnum = vis->Style.ColormapNum; - FColormap *oldcolormap = vis->Style.BaseColormap; + FSWColormap *oldcolormap = vis->Style.BaseColormap; camera->Inventory->AlterWeaponSprite (&vis->Style); if (vis->Style.BaseColormap != oldcolormap || vis->Style.ColormapNum != oldcolormapnum) { @@ -1960,7 +1960,7 @@ void R_DrawSprite (vissprite_t *spr) int r1, r2; short topclip, botclip; short *clip1, *clip2; - FColormap *colormap = spr->Style.BaseColormap; + FSWColormap *colormap = spr->Style.BaseColormap; int colormapnum = spr->Style.ColormapNum; F3DFloor *rover; FDynamicColormap *mybasecolormap; @@ -2486,7 +2486,7 @@ void R_ProjectParticle (particle_t *particle, const sector_t *sector, int shade, int x1, x2, y1, y2; vissprite_t* vis; sector_t* heightsec = NULL; - FColormap* map; + FSWColormap* map; // [ZZ] Particle not visible through the portal plane if (CurrentPortal && !!P_PointOnLineSide(particle->Pos, CurrentPortal->dst)) @@ -2785,7 +2785,7 @@ extern double BaseYaspectMul;; void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, const FVector3 &dasprpos, DAngle dasprang, fixed_t daxscale, fixed_t dayscale, FVoxel *voxobj, - FColormap *colormap, int colormapnum, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags) + FSWColormap *colormap, int colormapnum, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags) { int i, j, k, x, y, syoff, ggxstart, ggystart, nxoff; fixed_t cosang, sinang, sprcosang, sprsinang; diff --git a/src/r_things.h b/src/r_things.h index 13f89574b..cbe34015f 100644 --- a/src/r_things.h +++ b/src/r_things.h @@ -144,7 +144,7 @@ enum { DVF_OFFSCREEN = 1, DVF_SPANSONLY = 2, DVF_MIRRORED = 4 }; void R_DrawVoxel(const FVector3 &viewpos, FAngle viewangle, const FVector3 &sprpos, DAngle dasprang, fixed_t daxscale, fixed_t dayscale, struct FVoxel *voxobj, - FColormap *colormap, int colormapnum, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags); + FSWColormap *colormap, int colormapnum, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags); void R_ClipVisSprite (vissprite_t *vis, int xl, int xh); diff --git a/src/win32/win32gliface.cpp b/src/win32/win32gliface.cpp index 7ca001e1e..59ef471d3 100644 --- a/src/win32/win32gliface.cpp +++ b/src/win32/win32gliface.cpp @@ -346,7 +346,8 @@ bool Win32GLVideo::GoFullscreen(bool yes) // //========================================================================== -DFrameBuffer *Win32GLVideo::CreateFrameBuffer(int width, int height, bool fs, DFrameBuffer *old) + +DFrameBuffer *Win32GLVideo::CreateFrameBuffer(int width, int height, bool bgra, bool fs, DFrameBuffer *old) { Win32GLFrameBuffer *fb; @@ -860,7 +861,7 @@ IMPLEMENT_ABSTRACT_CLASS(Win32GLFrameBuffer) // //========================================================================== -Win32GLFrameBuffer::Win32GLFrameBuffer(void *hMonitor, int width, int height, int bits, int refreshHz, bool fullscreen) : BaseWinFB(width, height) +Win32GLFrameBuffer::Win32GLFrameBuffer(void *hMonitor, int width, int height, int bits, int refreshHz, bool fullscreen) : BaseWinFB(width, height, false) { m_Width = width; m_Height = height; diff --git a/src/win32/win32gliface.h b/src/win32/win32gliface.h index 6320e2903..87eb10de6 100644 --- a/src/win32/win32gliface.h +++ b/src/win32/win32gliface.h @@ -38,7 +38,7 @@ public: void StartModeIterator (int bits, bool fs); bool NextMode (int *width, int *height, bool *letterbox); bool GoFullscreen(bool yes); - DFrameBuffer *CreateFrameBuffer (int width, int height, bool fs, DFrameBuffer *old); + DFrameBuffer *CreateFrameBuffer (int width, int height, bool bgra, bool fs, DFrameBuffer *old); virtual bool SetResolution (int width, int height, int bits); void DumpAdapters(); bool InitHardware (HWND Window, int multisample); From 3ebf8c7e746ec0f0b9bab827e7b2fa9a0f91bd30 Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Wed, 14 Sep 2016 06:28:39 -0400 Subject: [PATCH 097/100] More code fixes - now it compiles. --- src/r_data/colormaps.h | 2 +- src/r_defs.h | 1 + src/v_video.cpp | 3 ++- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/r_data/colormaps.h b/src/r_data/colormaps.h index 039a85189..ca1574893 100644 --- a/src/r_data/colormaps.h +++ b/src/r_data/colormaps.h @@ -1,7 +1,7 @@ #ifndef __RES_CMAP_H #define __RES_CMAP_H -struct FColormap; +struct FSWColormap; void R_InitColormaps (); void R_DeinitColormaps (); diff --git a/src/r_defs.h b/src/r_defs.h index 7d7ad7eab..50d514fc5 100644 --- a/src/r_defs.h +++ b/src/r_defs.h @@ -1516,6 +1516,7 @@ struct visstyle_t { int ColormapNum; // Which colormap is rendered FSWColormap *BaseColormap; // Base colormap used together with ColormapNum + lighttable_t *colormap; // [SP] Restored from GZDoom - will this work? float Alpha; FRenderStyle RenderStyle; }; diff --git a/src/v_video.cpp b/src/v_video.cpp index d07fdc61a..1bef7df40 100644 --- a/src/v_video.cpp +++ b/src/v_video.cpp @@ -71,6 +71,7 @@ FRenderer *Renderer; IMPLEMENT_ABSTRACT_CLASS (DCanvas) IMPLEMENT_ABSTRACT_CLASS (DFrameBuffer) +EXTERN_CVAR (Bool, swtruecolor) #if defined(_DEBUG) && defined(_M_IX86) #define DBGBREAK { __asm int 3 } @@ -813,7 +814,7 @@ void DSimpleCanvas::Resize(int width, int height) Pitch = width + MAX(0, CPU.DataL1LineSize - 8); } } - int bytes_per_pixel = bgra ? 4 : 1; + int bytes_per_pixel = swtruecolor ? 4 : 1; MemBuffer = new BYTE[Pitch * height * bytes_per_pixel]; memset (MemBuffer, 0, Pitch * height * bytes_per_pixel); } From b0029fcd1ebb67e7ff23fdebd72c51b13a17a55f Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Wed, 14 Sep 2016 06:38:08 -0400 Subject: [PATCH 098/100] Set version to 0.0 (prerelease), set render defaults for true-color software renderer since that is the focus of this project --- src/posix/cocoa/i_video.mm | 2 +- src/posix/sdl/hardware.cpp | 2 +- src/version.h | 8 ++++---- src/win32/hardware.cpp | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/posix/cocoa/i_video.mm b/src/posix/cocoa/i_video.mm index 20a93ce25..4f91fd369 100644 --- a/src/posix/cocoa/i_video.mm +++ b/src/posix/cocoa/i_video.mm @@ -106,7 +106,7 @@ EXTERN_CVAR(Bool, ticker ) EXTERN_CVAR(Bool, vid_vsync) EXTERN_CVAR(Bool, vid_hidpi) -CUSTOM_CVAR(Bool, swtruecolor, false, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) +CUSTOM_CVAR(Bool, swtruecolor, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) { // Strictly speaking this doesn't require a mode switch, but it is the easiest // way to force a CreateFramebuffer call without a lot of refactoring. diff --git a/src/posix/sdl/hardware.cpp b/src/posix/sdl/hardware.cpp index 2c0ba617e..b69a0d3d8 100644 --- a/src/posix/sdl/hardware.cpp +++ b/src/posix/sdl/hardware.cpp @@ -321,7 +321,7 @@ CUSTOM_CVAR (Int, vid_maxfps, 200, CVAR_ARCHIVE | CVAR_GLOBALCONFIG) extern int NewWidth, NewHeight, NewBits, DisplayBits; -CUSTOM_CVAR(Bool, swtruecolor, false, CVAR_ARCHIVE|CVAR_GLOBALCONFIG|CVAR_NOINITCALL) +CUSTOM_CVAR(Bool, swtruecolor, true, CVAR_ARCHIVE|CVAR_GLOBALCONFIG|CVAR_NOINITCALL) { // Strictly speaking this doesn't require a mode switch, but it is the easiest // way to force a CreateFramebuffer call without a lot of refactoring. diff --git a/src/version.h b/src/version.h index 9b2cd8f1a..f3706b1de 100644 --- a/src/version.h +++ b/src/version.h @@ -41,12 +41,12 @@ const char *GetVersionString(); /** Lots of different version numbers **/ -#define VERSIONSTR "2.2pre" +#define VERSIONSTR "0.0pre" // The version as seen in the Windows resource -#define RC_FILEVERSION 2,1,9999,0 -#define RC_PRODUCTVERSION 2,1,9999,0 -#define RC_PRODUCTVERSION2 "2.2pre" +#define RC_FILEVERSION 0,0,9999,0 +#define RC_PRODUCTVERSION 0,0,9999,0 +#define RC_PRODUCTVERSION2 "0.0pre" // Version identifier for network games. // Bump it every time you do a release unless you're certain you diff --git a/src/win32/hardware.cpp b/src/win32/hardware.cpp index cc55dd400..3cf941307 100644 --- a/src/win32/hardware.cpp +++ b/src/win32/hardware.cpp @@ -72,7 +72,7 @@ int currentrenderer = -1; bool changerenderer; // [ZDoomGL] -CUSTOM_CVAR (Int, vid_renderer, 1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) +CUSTOM_CVAR (Int, vid_renderer, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) { // 0: Software renderer // 1: OpenGL renderer @@ -358,7 +358,7 @@ void I_RestoreWindowedPos () extern int NewWidth, NewHeight, NewBits, DisplayBits; -CUSTOM_CVAR(Bool, swtruecolor, false, CVAR_ARCHIVE|CVAR_GLOBALCONFIG|CVAR_NOINITCALL) +CUSTOM_CVAR(Bool, swtruecolor, true, CVAR_ARCHIVE|CVAR_GLOBALCONFIG|CVAR_NOINITCALL) { // Strictly speaking this doesn't require a mode switch, but it is the easiest // way to force a CreateFramebuffer call without a lot of refactoring. From 842558384a74c9f427fb41aaeab426f4fadbd7c7 Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Wed, 14 Sep 2016 07:33:31 -0400 Subject: [PATCH 099/100] Forgot to set vid_renderer defaults for Linux and Mac. --- src/posix/cocoa/i_video.mm | 2 +- src/posix/sdl/hardware.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/posix/cocoa/i_video.mm b/src/posix/cocoa/i_video.mm index 4f91fd369..ba3a3e27e 100644 --- a/src/posix/cocoa/i_video.mm +++ b/src/posix/cocoa/i_video.mm @@ -134,7 +134,7 @@ CUSTOM_CVAR(Bool, vid_autoswitch, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_ static int s_currentRenderer; -CUSTOM_CVAR(Int, vid_renderer, 1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) +CUSTOM_CVAR(Int, vid_renderer, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) { // 0: Software renderer // 1: OpenGL renderer diff --git a/src/posix/sdl/hardware.cpp b/src/posix/sdl/hardware.cpp index b69a0d3d8..18c7ad737 100644 --- a/src/posix/sdl/hardware.cpp +++ b/src/posix/sdl/hardware.cpp @@ -65,7 +65,7 @@ void I_RestartRenderer(); int currentrenderer; // [ZDoomGL] -CUSTOM_CVAR (Int, vid_renderer, 1, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) +CUSTOM_CVAR (Int, vid_renderer, 0, CVAR_ARCHIVE | CVAR_GLOBALCONFIG | CVAR_NOINITCALL) { // 0: Software renderer // 1: OpenGL renderer From d9e60644b1e256c6e8bed445e070a3f17598820a Mon Sep 17 00:00:00 2001 From: raa-eruanna Date: Wed, 21 Sep 2016 01:08:00 -0400 Subject: [PATCH 100/100] Some Linux SDL fixes. Will have to do this for Mac, later, too. --- src/posix/sdl/sdlglvideo.cpp | 4 ++-- src/posix/sdl/sdlglvideo.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/posix/sdl/sdlglvideo.cpp b/src/posix/sdl/sdlglvideo.cpp index d8c00f236..e581cfde9 100644 --- a/src/posix/sdl/sdlglvideo.cpp +++ b/src/posix/sdl/sdlglvideo.cpp @@ -163,7 +163,7 @@ bool SDLGLVideo::NextMode (int *width, int *height, bool *letterbox) return false; } -DFrameBuffer *SDLGLVideo::CreateFrameBuffer (int width, int height, bool fullscreen, DFrameBuffer *old) +DFrameBuffer *SDLGLVideo::CreateFrameBuffer (int width, int height, bool bgra, bool fullscreen, DFrameBuffer *old) { static int retry = 0; static int owidth, oheight; @@ -315,7 +315,7 @@ bool SDLGLVideo::InitHardware (bool allowsoftware, int multisample) // FrameBuffer implementation ----------------------------------------------- SDLGLFB::SDLGLFB (void *, int width, int height, int, int, bool fullscreen) - : DFrameBuffer (width, height) + : DFrameBuffer (width, height, false) { int i; diff --git a/src/posix/sdl/sdlglvideo.h b/src/posix/sdl/sdlglvideo.h index d8ce9005d..3b84f83c4 100644 --- a/src/posix/sdl/sdlglvideo.h +++ b/src/posix/sdl/sdlglvideo.h @@ -21,7 +21,7 @@ class SDLGLVideo : public IVideo EDisplayType GetDisplayType () { return DISPLAY_Both; } void SetWindowedScale (float scale); - DFrameBuffer *CreateFrameBuffer (int width, int height, bool fs, DFrameBuffer *old); + DFrameBuffer *CreateFrameBuffer (int width, int height, bool bgra, bool fs, DFrameBuffer *old); void StartModeIterator (int bits, bool fs); bool NextMode (int *width, int *height, bool *letterbox);